diff options
Diffstat (limited to 'drivers/net/virtio')
20 files changed, 1487 insertions, 452 deletions
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile index 97972a6c..b21b8781 100644 --- a/drivers/net/virtio/Makefile +++ b/drivers/net/virtio/Makefile @@ -60,14 +60,10 @@ endif ifeq ($(CONFIG_RTE_VIRTIO_USER),y) SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_user.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/vhost_kernel_tap.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user/virtio_user_dev.c SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_user_ethdev.c endif -# this lib depends upon: -DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether -DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf -DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_net -DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_kvargs - include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c index f5961ab7..983b95f1 100644 --- a/drivers/net/virtio/virtio_ethdev.c +++ b/drivers/net/virtio/virtio_ethdev.c @@ -38,6 +38,7 @@ #include <unistd.h> #include <rte_ethdev.h> +#include <rte_ethdev_pci.h> #include <rte_memcpy.h> #include <rte_string_fns.h> #include <rte_memzone.h> @@ -86,7 +87,7 @@ static void virtio_dev_stats_reset(struct rte_eth_dev *dev); static void virtio_dev_free_mbufs(struct rte_eth_dev *dev); static int virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on); -static void virtio_mac_addr_add(struct rte_eth_dev *dev, +static int virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr, uint32_t index, uint32_t vmdq __rte_unused); static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index); @@ -485,11 +486,11 @@ virtio_init_queue(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx) hw->cvq = cvq; } - /* For virtio_user case (that is when dev->pci_dev is NULL), we use + /* For virtio_user case (that is when hw->dev is NULL), we use * virtual address. And we need properly set _offset_, please see * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information. */ - if (dev->pci_dev) + if (!hw->virtio_user_dev) vq->offset = offsetof(struct rte_mbuf, buf_physaddr); else { vq->vq_ring_mem = (uintptr_t)mz->addr; @@ -545,6 +546,9 @@ virtio_free_queues(struct virtio_hw *hw) int queue_type; uint16_t i; + if (hw->vqs == NULL) + return; + for (i = 0; i < nr_vq; i++) { vq = hw->vqs[i]; if (!vq) @@ -563,9 +567,11 @@ virtio_free_queues(struct virtio_hw *hw) } rte_free(vq); + hw->vqs[i] = NULL; } rte_free(hw->vqs); + hw->vqs = NULL; } static int @@ -593,16 +599,29 @@ virtio_alloc_queues(struct rte_eth_dev *dev) return 0; } +static void virtio_queues_unbind_intr(struct rte_eth_dev *dev); + static void virtio_dev_close(struct rte_eth_dev *dev) { struct virtio_hw *hw = dev->data->dev_private; + struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf; PMD_INIT_LOG(DEBUG, "virtio_dev_close"); /* reset the NIC */ if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR); + VTPCI_OPS(hw)->set_config_irq(hw, VIRTIO_MSI_NO_VECTOR); + if (intr_conf->rxq) + virtio_queues_unbind_intr(dev); + + if (intr_conf->lsc || intr_conf->rxq) { + rte_intr_disable(dev->intr_handle); + rte_intr_efd_disable(dev->intr_handle); + rte_free(dev->intr_handle->intr_vec); + dev->intr_handle->intr_vec = NULL; + } + vtpci_reset(hw); virtio_dev_free_mbufs(dev); virtio_free_queues(hw); @@ -617,7 +636,7 @@ virtio_dev_promiscuous_enable(struct rte_eth_dev *dev) int ret; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { - PMD_INIT_LOG(INFO, "host does not support rx control\n"); + PMD_INIT_LOG(INFO, "host does not support rx control"); return; } @@ -640,7 +659,7 @@ virtio_dev_promiscuous_disable(struct rte_eth_dev *dev) int ret; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { - PMD_INIT_LOG(INFO, "host does not support rx control\n"); + PMD_INIT_LOG(INFO, "host does not support rx control"); return; } @@ -663,7 +682,7 @@ virtio_dev_allmulticast_enable(struct rte_eth_dev *dev) int ret; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { - PMD_INIT_LOG(INFO, "host does not support rx control\n"); + PMD_INIT_LOG(INFO, "host does not support rx control"); return; } @@ -686,7 +705,7 @@ virtio_dev_allmulticast_disable(struct rte_eth_dev *dev) int ret; if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { - PMD_INIT_LOG(INFO, "host does not support rx control\n"); + PMD_INIT_LOG(INFO, "host does not support rx control"); return; } @@ -708,15 +727,38 @@ virtio_mtu_set(struct rte_eth_dev *dev, uint16_t mtu) uint32_t ether_hdr_len = ETHER_HDR_LEN + VLAN_TAG_LEN + hw->vtnet_hdr_size; uint32_t frame_size = mtu + ether_hdr_len; + uint32_t max_frame_size = hw->max_mtu + ether_hdr_len; + + max_frame_size = RTE_MIN(max_frame_size, VIRTIO_MAX_RX_PKTLEN); - if (mtu < ETHER_MIN_MTU || frame_size > VIRTIO_MAX_RX_PKTLEN) { - PMD_INIT_LOG(ERR, "MTU should be between %d and %d\n", - ETHER_MIN_MTU, VIRTIO_MAX_RX_PKTLEN - ether_hdr_len); + if (mtu < ETHER_MIN_MTU || frame_size > max_frame_size) { + PMD_INIT_LOG(ERR, "MTU should be between %d and %d", + ETHER_MIN_MTU, max_frame_size - ether_hdr_len); return -EINVAL; } return 0; } +static int +virtio_dev_rx_queue_intr_enable(struct rte_eth_dev *dev, uint16_t queue_id) +{ + struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id]; + struct virtqueue *vq = rxvq->vq; + + virtqueue_enable_intr(vq); + return 0; +} + +static int +virtio_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id) +{ + struct virtnet_rx *rxvq = dev->data->rx_queues[queue_id]; + struct virtqueue *vq = rxvq->vq; + + virtqueue_disable_intr(vq); + return 0; +} + /* * dev_ops for virtio, bare necessities for basic operation */ @@ -738,7 +780,10 @@ static const struct eth_dev_ops virtio_eth_dev_ops = { .xstats_reset = virtio_dev_stats_reset, .link_update = virtio_dev_link_update, .rx_queue_setup = virtio_dev_rx_queue_setup, + .rx_queue_intr_enable = virtio_dev_rx_queue_intr_enable, + .rx_queue_intr_disable = virtio_dev_rx_queue_intr_disable, .rx_queue_release = virtio_dev_queue_release, + .rx_descriptor_done = virtio_dev_rx_queue_done, .tx_queue_setup = virtio_dev_tx_queue_setup, .tx_queue_release = virtio_dev_queue_release, /* collect stats per queue */ @@ -980,7 +1025,7 @@ virtio_get_hwaddr(struct virtio_hw *hw) } } -static void +static int virtio_mac_table_set(struct virtio_hw *hw, const struct virtio_net_ctrl_mac *uc, const struct virtio_net_ctrl_mac *mc) @@ -990,7 +1035,7 @@ virtio_mac_table_set(struct virtio_hw *hw, if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) { PMD_DRV_LOG(INFO, "host does not support mac table"); - return; + return -1; } ctrl.hdr.class = VIRTIO_NET_CTRL_MAC; @@ -1005,9 +1050,10 @@ virtio_mac_table_set(struct virtio_hw *hw, err = virtio_send_command(hw->cvq, &ctrl, len, 2); if (err != 0) PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err); + return err; } -static void +static int virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr, uint32_t index, uint32_t vmdq __rte_unused) { @@ -1018,7 +1064,7 @@ virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr, if (index >= VIRTIO_MAX_MAC_ADDRS) { PMD_DRV_LOG(ERR, "mac address index %u out of range", index); - return; + return -EINVAL; } uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries)); @@ -1035,7 +1081,7 @@ virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr, memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN); } - virtio_mac_table_set(hw, uc, mc); + return virtio_mac_table_set(hw, uc, mc); } static void @@ -1122,6 +1168,18 @@ virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features) PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64, host_features); + /* If supported, ensure MTU value is valid before acknowledging it. */ + if (host_features & req_features & (1ULL << VIRTIO_NET_F_MTU)) { + struct virtio_net_config config; + + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, mtu), + &config.mtu, sizeof(config.mtu)); + + if (config.mtu < ETHER_MIN_MTU) + req_features &= ~(1ULL << VIRTIO_NET_F_MTU); + } + /* * Negotiate features: Subset of device feature bits are written back * guest feature bits. @@ -1154,9 +1212,8 @@ virtio_negotiate_features(struct virtio_hw *hw, uint64_t req_features) * Process Virtio Config changed interrupt and call the callback * if link state changed. */ -static void -virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle, - void *param) +void +virtio_interrupt_handler(void *param) { struct rte_eth_dev *dev = param; struct virtio_hw *hw = dev->data->dev_private; @@ -1166,7 +1223,7 @@ virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle, isr = vtpci_isr(hw); PMD_DRV_LOG(INFO, "interrupt status = %#x", isr); - if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) + if (rte_intr_enable(dev->intr_handle) < 0) PMD_DRV_LOG(ERR, "interrupt enable failed"); if (isr & VIRTIO_PCI_ISR_CONFIG) { @@ -1187,6 +1244,95 @@ rx_func_get(struct rte_eth_dev *eth_dev) eth_dev->rx_pkt_burst = &virtio_recv_pkts; } +/* Only support 1:1 queue/interrupt mapping so far. + * TODO: support n:1 queue/interrupt mapping when there are limited number of + * interrupt vectors (<N+1). + */ +static int +virtio_queues_bind_intr(struct rte_eth_dev *dev) +{ + uint32_t i; + struct virtio_hw *hw = dev->data->dev_private; + + PMD_INIT_LOG(INFO, "queue/interrupt binding"); + for (i = 0; i < dev->data->nb_rx_queues; ++i) { + dev->intr_handle->intr_vec[i] = i + 1; + if (VTPCI_OPS(hw)->set_queue_irq(hw, hw->vqs[i * 2], i + 1) == + VIRTIO_MSI_NO_VECTOR) { + PMD_DRV_LOG(ERR, "failed to set queue vector"); + return -EBUSY; + } + } + + return 0; +} + +static void +virtio_queues_unbind_intr(struct rte_eth_dev *dev) +{ + uint32_t i; + struct virtio_hw *hw = dev->data->dev_private; + + PMD_INIT_LOG(INFO, "queue/interrupt unbinding"); + for (i = 0; i < dev->data->nb_rx_queues; ++i) + VTPCI_OPS(hw)->set_queue_irq(hw, + hw->vqs[i * VTNET_CQ], + VIRTIO_MSI_NO_VECTOR); +} + +static int +virtio_configure_intr(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + + if (!rte_intr_cap_multiple(dev->intr_handle)) { + PMD_INIT_LOG(ERR, "Multiple intr vector not supported"); + return -ENOTSUP; + } + + if (rte_intr_efd_enable(dev->intr_handle, dev->data->nb_rx_queues)) { + PMD_INIT_LOG(ERR, "Fail to create eventfd"); + return -1; + } + + if (!dev->intr_handle->intr_vec) { + dev->intr_handle->intr_vec = + rte_zmalloc("intr_vec", + hw->max_queue_pairs * sizeof(int), 0); + if (!dev->intr_handle->intr_vec) { + PMD_INIT_LOG(ERR, "Failed to allocate %u rxq vectors", + hw->max_queue_pairs); + return -ENOMEM; + } + } + + /* Re-register callback to update max_intr */ + rte_intr_callback_unregister(dev->intr_handle, + virtio_interrupt_handler, + dev); + rte_intr_callback_register(dev->intr_handle, + virtio_interrupt_handler, + dev); + + /* DO NOT try to remove this! This function will enable msix, or QEMU + * will encounter SIGSEGV when DRIVER_OK is sent. + * And for legacy devices, this should be done before queue/vec binding + * to change the config size from 20 to 24, or VIRTIO_MSI_QUEUE_VECTOR + * (22) will be ignored. + */ + if (rte_intr_enable(dev->intr_handle) < 0) { + PMD_DRV_LOG(ERR, "interrupt enable failed"); + return -1; + } + + if (virtio_queues_bind_intr(dev) < 0) { + PMD_INIT_LOG(ERR, "Failed to bind queue/interrupt"); + return -1; + } + + return 0; +} + /* reset device and renegotiate features if needed */ static int virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features) @@ -1194,7 +1340,7 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features) struct virtio_hw *hw = eth_dev->data->dev_private; struct virtio_net_config *config; struct virtio_net_config local_config; - struct rte_pci_device *pci_dev = eth_dev->pci_dev; + struct rte_pci_device *pci_dev = NULL; int ret; /* Reset the device although not necessary at startup */ @@ -1208,13 +1354,17 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features) if (virtio_negotiate_features(hw, req_features) < 0) return -1; - rte_eth_copy_pci_info(eth_dev, pci_dev); + if (!hw->virtio_user_dev) { + pci_dev = RTE_DEV_TO_PCI(eth_dev->device); + rte_eth_copy_pci_info(eth_dev, pci_dev); + } - /* If host does not support status then disable LSC */ - if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) - eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; - else + eth_dev->data->dev_flags = RTE_ETH_DEV_DETACHABLE; + /* If host does not support both status and MSI-X then disable LSC */ + if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS) && hw->use_msix) eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; + else + eth_dev->data->dev_flags &= ~RTE_ETH_DEV_INTR_LSC; rx_func_get(eth_dev); @@ -1264,6 +1414,32 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features) hw->max_queue_pairs = config->max_virtqueue_pairs; + if (vtpci_with_feature(hw, VIRTIO_NET_F_MTU)) { + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, mtu), + &config->mtu, + sizeof(config->mtu)); + + /* + * MTU value has already been checked at negotiation + * time, but check again in case it has changed since + * then, which should not happen. + */ + if (config->mtu < ETHER_MIN_MTU) { + PMD_INIT_LOG(ERR, "invalid max MTU value (%u)", + config->mtu); + return -1; + } + + hw->max_mtu = config->mtu; + /* Set initial MTU to maximum one supported by vhost */ + eth_dev->data->mtu = config->mtu; + + } else { + hw->max_mtu = VIRTIO_MAX_RX_PKTLEN - ETHER_HDR_LEN - + VLAN_TAG_LEN - hw->vtnet_hdr_size; + } + PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d", config->max_virtqueue_pairs); PMD_INIT_LOG(DEBUG, "config->status=%d", config->status); @@ -1280,6 +1456,14 @@ virtio_init_device(struct rte_eth_dev *eth_dev, uint64_t req_features) ret = virtio_alloc_queues(eth_dev); if (ret < 0) return ret; + + if (eth_dev->data->dev_conf.intr_conf.rxq) { + if (virtio_configure_intr(eth_dev) < 0) { + PMD_INIT_LOG(ERR, "failed to configure interrupt"); + return -1; + } + } + vtpci_reinit_complete(hw); if (pci_dev) @@ -1301,7 +1485,7 @@ virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw) if (hw->modern) { /* * We don't have to re-parse the PCI config space, since - * rte_eal_pci_map_device() makes sure the mapped address + * rte_pci_map_device() makes sure the mapped address * in secondary process would equal to the one mapped in * the primary process: error will be returned if that * requirement is not met. @@ -1310,12 +1494,12 @@ virtio_remap_pci(struct rte_pci_device *pci_dev, struct virtio_hw *hw) * (such as dev_cfg, common_cfg, etc.) parsed from the * primary process, which is stored in shared memory. */ - if (rte_eal_pci_map_device(pci_dev)) { + if (rte_pci_map_device(pci_dev)) { PMD_INIT_LOG(DEBUG, "failed to map pci device!"); return -1; } } else { - if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) + if (rte_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) return -1; } @@ -1344,8 +1528,6 @@ int eth_virtio_dev_init(struct rte_eth_dev *eth_dev) { struct virtio_hw *hw = eth_dev->data->dev_private; - struct rte_pci_device *pci_dev; - uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE; int ret; RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf)); @@ -1355,7 +1537,8 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) if (rte_eal_process_type() == RTE_PROC_SECONDARY) { if (!hw->virtio_user_dev) { - ret = virtio_remap_pci(eth_dev->pci_dev, hw); + ret = virtio_remap_pci(RTE_DEV_TO_PCI(eth_dev->device), + hw); if (ret) return ret; } @@ -1379,17 +1562,16 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) return -ENOMEM; } - pci_dev = eth_dev->pci_dev; hw->port_id = eth_dev->data->port_id; - - if (pci_dev) { - ret = vtpci_init(pci_dev, hw, &dev_flags); + /* For virtio_user case the hw->virtio_user_dev is populated by + * virtio_user_eth_dev_alloc() before eth_virtio_dev_init() is called. + */ + if (!hw->virtio_user_dev) { + ret = vtpci_init(RTE_DEV_TO_PCI(eth_dev->device), hw); if (ret) return ret; } - eth_dev->data->dev_flags = dev_flags; - /* reset device and negotiate default features */ ret = virtio_init_device(eth_dev, VIRTIO_PMD_DEFAULT_GUEST_FEATURES); if (ret < 0) @@ -1397,7 +1579,7 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) /* Setup interrupt callback */ if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - rte_intr_callback_register(&pci_dev->intr_handle, + rte_intr_callback_register(eth_dev->intr_handle, virtio_interrupt_handler, eth_dev); return 0; @@ -1406,8 +1588,6 @@ eth_virtio_dev_init(struct rte_eth_dev *eth_dev) static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) { - struct rte_pci_device *pci_dev; - PMD_INIT_FUNC_TRACE(); if (rte_eal_process_type() == RTE_PROC_SECONDARY) @@ -1415,7 +1595,6 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) virtio_dev_stop(eth_dev); virtio_dev_close(eth_dev); - pci_dev = eth_dev->pci_dev; eth_dev->dev_ops = NULL; eth_dev->tx_pkt_burst = NULL; @@ -1426,29 +1605,37 @@ eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) /* reset interrupt callback */ if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - rte_intr_callback_unregister(&pci_dev->intr_handle, + rte_intr_callback_unregister(eth_dev->intr_handle, virtio_interrupt_handler, eth_dev); - rte_eal_pci_unmap_device(pci_dev); + if (eth_dev->device) + rte_pci_unmap_device(RTE_DEV_TO_PCI(eth_dev->device)); PMD_INIT_LOG(DEBUG, "dev_uninit completed"); return 0; } -static struct eth_driver rte_virtio_pmd = { - .pci_drv = { - .driver = { - .name = "net_virtio", - }, - .id_table = pci_id_virtio_map, - .drv_flags = RTE_PCI_DRV_DETACHABLE, - .probe = rte_eth_dev_pci_probe, - .remove = rte_eth_dev_pci_remove, +static int eth_virtio_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, + struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct virtio_hw), + eth_virtio_dev_init); +} + +static int eth_virtio_pci_remove(struct rte_pci_device *pci_dev) +{ + return rte_eth_dev_pci_generic_remove(pci_dev, eth_virtio_dev_uninit); +} + +static struct rte_pci_driver rte_virtio_pmd = { + .driver = { + .name = "net_virtio", }, - .eth_dev_init = eth_virtio_dev_init, - .eth_dev_uninit = eth_virtio_dev_uninit, - .dev_private_size = sizeof(struct virtio_hw), + .id_table = pci_id_virtio_map, + .drv_flags = 0, + .probe = eth_virtio_pci_probe, + .remove = eth_virtio_pci_remove, }; RTE_INIT(rte_virtio_pmd_init); @@ -1460,7 +1647,7 @@ rte_virtio_pmd_init(void) return; } - rte_eal_pci_register(&rte_virtio_pmd.pci_drv); + rte_pci_register(&rte_virtio_pmd); } /* @@ -1520,7 +1707,9 @@ virtio_dev_configure(struct rte_eth_dev *dev) } if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) - if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) { + /* Enable vector (0) for Link State Intrerrupt */ + if (VTPCI_OPS(hw)->set_config_irq(hw, 0) == + VIRTIO_MSI_NO_VECTOR) { PMD_DRV_LOG(ERR, "failed to set config vector"); return -EBUSY; } @@ -1543,16 +1732,22 @@ virtio_dev_start(struct rte_eth_dev *dev) PMD_DRV_LOG(ERR, "link status not supported by host"); return -ENOTSUP; } + } - if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) { + /* Enable uio/vfio intr/eventfd mapping: althrough we already did that + * in device configure, but it could be unmapped when device is + * stopped. + */ + if (dev->data->dev_conf.intr_conf.lsc || + dev->data->dev_conf.intr_conf.rxq) { + rte_intr_disable(dev->intr_handle); + + if (rte_intr_enable(dev->intr_handle) < 0) { PMD_DRV_LOG(ERR, "interrupt enable failed"); return -EIO; } } - /* Initialize Link state */ - virtio_dev_link_update(dev, 0); - /*Notify the backend *Otherwise the tap backend might already stop its queue due to fullness. *vhost backend will have no chance to be waked up @@ -1582,6 +1777,11 @@ virtio_dev_start(struct rte_eth_dev *dev) VIRTQUEUE_DUMP(txvq->vq); } + hw->started = 1; + + /* Initialize Link state */ + virtio_dev_link_update(dev, 0); + return 0; } @@ -1636,13 +1836,16 @@ static void virtio_dev_free_mbufs(struct rte_eth_dev *dev) static void virtio_dev_stop(struct rte_eth_dev *dev) { + struct virtio_hw *hw = dev->data->dev_private; struct rte_eth_link link; + struct rte_intr_conf *intr_conf = &dev->data->dev_conf.intr_conf; PMD_INIT_LOG(DEBUG, "stop"); - if (dev->data->dev_conf.intr_conf.lsc) - rte_intr_disable(&dev->pci_dev->intr_handle); + if (intr_conf->lsc || intr_conf->rxq) + rte_intr_disable(dev->intr_handle); + hw->started = 0; memset(&link, 0, sizeof(link)); virtio_dev_atomic_write_link_status(dev, &link); } @@ -1659,7 +1862,9 @@ virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complet link.link_duplex = ETH_LINK_FULL_DUPLEX; link.link_speed = SPEED_10G; - if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { + if (hw->started == 0) { + link.link_status = ETH_LINK_DOWN; + } else if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { PMD_INIT_LOG(DEBUG, "Get link status from hw"); vtpci_read_dev_config(hw, offsetof(struct virtio_net_config, status), @@ -1684,13 +1889,12 @@ virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complet static void virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) { - uint64_t tso_mask; + uint64_t tso_mask, host_features; struct virtio_hw *hw = dev->data->dev_private; - if (dev->pci_dev) - dev_info->driver_name = dev->driver->pci_drv.driver.name; - else - dev_info->driver_name = "virtio_user PMD"; + dev_info->speed_capa = ETH_LINK_SPEED_10G; /* fake value */ + + dev_info->pci_dev = dev->device ? RTE_DEV_TO_PCI(dev->device) : NULL; dev_info->max_rx_queues = RTE_MIN(hw->max_queue_pairs, VIRTIO_MAX_RX_QUEUES); dev_info->max_tx_queues = @@ -1701,18 +1905,25 @@ virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) dev_info->default_txconf = (struct rte_eth_txconf) { .txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS }; - dev_info->rx_offload_capa = - DEV_RX_OFFLOAD_TCP_CKSUM | - DEV_RX_OFFLOAD_UDP_CKSUM | - DEV_RX_OFFLOAD_TCP_LRO; - dev_info->tx_offload_capa = 0; + host_features = VTPCI_OPS(hw)->get_features(hw); + dev_info->rx_offload_capa = 0; + if (host_features & (1ULL << VIRTIO_NET_F_GUEST_CSUM)) { + dev_info->rx_offload_capa |= + DEV_RX_OFFLOAD_TCP_CKSUM | + DEV_RX_OFFLOAD_UDP_CKSUM; + } + tso_mask = (1ULL << VIRTIO_NET_F_GUEST_TSO4) | + (1ULL << VIRTIO_NET_F_GUEST_TSO6); + if ((host_features & tso_mask) == tso_mask) + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_LRO; + + dev_info->tx_offload_capa = 0; if (hw->guest_features & (1ULL << VIRTIO_NET_F_CSUM)) { dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_UDP_CKSUM | DEV_TX_OFFLOAD_TCP_CKSUM; } - tso_mask = (1ULL << VIRTIO_NET_F_HOST_TSO4) | (1ULL << VIRTIO_NET_F_HOST_TSO6); if ((hw->guest_features & tso_mask) == tso_mask) @@ -1732,3 +1943,4 @@ __rte_unused uint8_t is_rx) RTE_PMD_EXPORT_NAME(net_virtio, __COUNTER__); RTE_PMD_REGISTER_PCI_TABLE(net_virtio, pci_id_virtio_map); +RTE_PMD_REGISTER_KMOD_DEP(net_virtio, "* igb_uio | uio_pci_generic | vfio"); diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h index 4feccf93..c3413c6d 100644 --- a/drivers/net/virtio/virtio_ethdev.h +++ b/drivers/net/virtio/virtio_ethdev.h @@ -51,7 +51,7 @@ #define VIRTIO_MAX_TX_QUEUES 128U #define VIRTIO_MAX_MAC_ADDRS 64 #define VIRTIO_MIN_RX_BUFSIZE 64 -#define VIRTIO_MAX_RX_PKTLEN 9728 +#define VIRTIO_MAX_RX_PKTLEN 9728U /* Features desired/implemented by this driver. */ #define VIRTIO_PMD_DEFAULT_GUEST_FEATURES \ @@ -66,6 +66,7 @@ 1u << VIRTIO_NET_F_HOST_TSO4 | \ 1u << VIRTIO_NET_F_HOST_TSO6 | \ 1u << VIRTIO_NET_F_MRG_RXBUF | \ + 1u << VIRTIO_NET_F_MTU | \ 1u << VIRTIO_RING_F_INDIRECT_DESC | \ 1ULL << VIRTIO_F_VERSION_1 | \ 1ULL << VIRTIO_F_IOMMU_PLATFORM) @@ -83,6 +84,9 @@ void virtio_dev_cq_start(struct rte_eth_dev *dev); /* * RX/TX function prototypes */ + +int virtio_dev_rx_queue_done(void *rxq, uint16_t offset); + int virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, uint16_t nb_rx_desc, unsigned int socket_id, const struct rte_eth_rxconf *rx_conf, @@ -109,4 +113,6 @@ uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); +void virtio_interrupt_handler(void *param); + #endif /* _VIRTIO_ETHDEV_H_ */ diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c index 8d5355c7..b7b3d615 100644 --- a/drivers/net/virtio/virtio_pci.c +++ b/drivers/net/virtio/virtio_pci.c @@ -37,6 +37,8 @@ #include <fcntl.h> #endif +#include <rte_io.h> + #include "virtio_pci.h" #include "virtio_logs.h" #include "virtqueue.h" @@ -48,6 +50,7 @@ */ #define PCI_CAPABILITY_LIST 0x34 #define PCI_CAP_ID_VNDR 0x09 +#define PCI_CAP_ID_MSIX 0x11 /* * The remaining space is defined by each driver as the per-driver @@ -92,17 +95,17 @@ legacy_read_dev_config(struct virtio_hw *hw, size_t offset, while (length > 0) { if (length >= 4) { size = 4; - rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size, + rte_pci_ioport_read(VTPCI_IO(hw), dst, size, VIRTIO_PCI_CONFIG(hw) + offset); *(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst); } else if (length >= 2) { size = 2; - rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size, + rte_pci_ioport_read(VTPCI_IO(hw), dst, size, VIRTIO_PCI_CONFIG(hw) + offset); *(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst); } else { size = 1; - rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, size, + rte_pci_ioport_read(VTPCI_IO(hw), dst, size, VIRTIO_PCI_CONFIG(hw) + offset); } @@ -111,8 +114,8 @@ legacy_read_dev_config(struct virtio_hw *hw, size_t offset, length -= size; } #else - rte_eal_pci_ioport_read(VTPCI_IO(hw), dst, length, - VIRTIO_PCI_CONFIG(hw) + offset); + rte_pci_ioport_read(VTPCI_IO(hw), dst, length, + VIRTIO_PCI_CONFIG(hw) + offset); #endif } @@ -131,16 +134,16 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset, if (length >= 4) { size = 4; tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src); - rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size, + rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u32, size, VIRTIO_PCI_CONFIG(hw) + offset); } else if (length >= 2) { size = 2; tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src); - rte_eal_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size, + rte_pci_ioport_write(VTPCI_IO(hw), &tmp.u16, size, VIRTIO_PCI_CONFIG(hw) + offset); } else { size = 1; - rte_eal_pci_ioport_write(VTPCI_IO(hw), src, size, + rte_pci_ioport_write(VTPCI_IO(hw), src, size, VIRTIO_PCI_CONFIG(hw) + offset); } @@ -149,8 +152,8 @@ legacy_write_dev_config(struct virtio_hw *hw, size_t offset, length -= size; } #else - rte_eal_pci_ioport_write(VTPCI_IO(hw), src, length, - VIRTIO_PCI_CONFIG(hw) + offset); + rte_pci_ioport_write(VTPCI_IO(hw), src, length, + VIRTIO_PCI_CONFIG(hw) + offset); #endif } @@ -159,8 +162,7 @@ legacy_get_features(struct virtio_hw *hw) { uint32_t dst; - rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 4, - VIRTIO_PCI_HOST_FEATURES); + rte_pci_ioport_read(VTPCI_IO(hw), &dst, 4, VIRTIO_PCI_HOST_FEATURES); return dst; } @@ -172,8 +174,8 @@ legacy_set_features(struct virtio_hw *hw, uint64_t features) "only 32 bit features are allowed for legacy virtio!"); return; } - rte_eal_pci_ioport_write(VTPCI_IO(hw), &features, 4, - VIRTIO_PCI_GUEST_FEATURES); + rte_pci_ioport_write(VTPCI_IO(hw), &features, 4, + VIRTIO_PCI_GUEST_FEATURES); } static uint8_t @@ -181,14 +183,14 @@ legacy_get_status(struct virtio_hw *hw) { uint8_t dst; - rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS); + rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_STATUS); return dst; } static void legacy_set_status(struct virtio_hw *hw, uint8_t status) { - rte_eal_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS); + rte_pci_ioport_write(VTPCI_IO(hw), &status, 1, VIRTIO_PCI_STATUS); } static void @@ -202,7 +204,7 @@ legacy_get_isr(struct virtio_hw *hw) { uint8_t dst; - rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR); + rte_pci_ioport_read(VTPCI_IO(hw), &dst, 1, VIRTIO_PCI_ISR); return dst; } @@ -212,10 +214,20 @@ legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec) { uint16_t dst; - rte_eal_pci_ioport_write(VTPCI_IO(hw), &vec, 2, - VIRTIO_MSI_CONFIG_VECTOR); - rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, - VIRTIO_MSI_CONFIG_VECTOR); + rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_CONFIG_VECTOR); + rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_CONFIG_VECTOR); + return dst; +} + +static uint16_t +legacy_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec) +{ + uint16_t dst; + + rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, + VIRTIO_PCI_QUEUE_SEL); + rte_pci_ioport_write(VTPCI_IO(hw), &vec, 2, VIRTIO_MSI_QUEUE_VECTOR); + rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_MSI_QUEUE_VECTOR); return dst; } @@ -224,9 +236,8 @@ legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) { uint16_t dst; - rte_eal_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2, - VIRTIO_PCI_QUEUE_SEL); - rte_eal_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM); + rte_pci_ioport_write(VTPCI_IO(hw), &queue_id, 2, VIRTIO_PCI_QUEUE_SEL); + rte_pci_ioport_read(VTPCI_IO(hw), &dst, 2, VIRTIO_PCI_QUEUE_NUM); return dst; } @@ -238,10 +249,10 @@ legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) if (!check_vq_phys_addr_ok(vq)) return -1; - rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, - VIRTIO_PCI_QUEUE_SEL); + rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, + VIRTIO_PCI_QUEUE_SEL); src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; - rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN); + rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN); return 0; } @@ -251,57 +262,16 @@ legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq) { uint32_t src = 0; - rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, - VIRTIO_PCI_QUEUE_SEL); - rte_eal_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN); + rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, + VIRTIO_PCI_QUEUE_SEL); + rte_pci_ioport_write(VTPCI_IO(hw), &src, 4, VIRTIO_PCI_QUEUE_PFN); } static void legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) { - rte_eal_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, - VIRTIO_PCI_QUEUE_NOTIFY); -} - -#ifdef RTE_EXEC_ENV_LINUXAPP -static int -legacy_virtio_has_msix(const struct rte_pci_addr *loc) -{ - DIR *d; - char dirname[PATH_MAX]; - - snprintf(dirname, sizeof(dirname), - "%s/" PCI_PRI_FMT "/msi_irqs", pci_get_sysfs_path(), - loc->domain, loc->bus, loc->devid, loc->function); - - d = opendir(dirname); - if (d) - closedir(d); - - return d != NULL; -} -#else -static int -legacy_virtio_has_msix(const struct rte_pci_addr *loc __rte_unused) -{ - /* nic_uio does not enable interrupts, return 0 (false). */ - return 0; -} -#endif - -static int -legacy_virtio_resource_init(struct rte_pci_device *pci_dev, - struct virtio_hw *hw, uint32_t *dev_flags) -{ - if (rte_eal_pci_ioport_map(pci_dev, 0, VTPCI_IO(hw)) < 0) - return -1; - - if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN) - *dev_flags |= RTE_ETH_DEV_INTR_LSC; - else - *dev_flags &= ~RTE_ETH_DEV_INTR_LSC; - - return 0; + rte_pci_ioport_write(VTPCI_IO(hw), &vq->vq_queue_index, 2, + VIRTIO_PCI_QUEUE_NOTIFY); } const struct virtio_pci_ops legacy_ops = { @@ -314,54 +284,18 @@ const struct virtio_pci_ops legacy_ops = { .set_features = legacy_set_features, .get_isr = legacy_get_isr, .set_config_irq = legacy_set_config_irq, + .set_queue_irq = legacy_set_queue_irq, .get_queue_num = legacy_get_queue_num, .setup_queue = legacy_setup_queue, .del_queue = legacy_del_queue, .notify_queue = legacy_notify_queue, }; - -static inline uint8_t -io_read8(uint8_t *addr) -{ - return *(volatile uint8_t *)addr; -} - -static inline void -io_write8(uint8_t val, uint8_t *addr) -{ - *(volatile uint8_t *)addr = val; -} - -static inline uint16_t -io_read16(uint16_t *addr) -{ - return *(volatile uint16_t *)addr; -} - -static inline void -io_write16(uint16_t val, uint16_t *addr) -{ - *(volatile uint16_t *)addr = val; -} - -static inline uint32_t -io_read32(uint32_t *addr) -{ - return *(volatile uint32_t *)addr; -} - -static inline void -io_write32(uint32_t val, uint32_t *addr) -{ - *(volatile uint32_t *)addr = val; -} - static inline void io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi) { - io_write32(val & ((1ULL << 32) - 1), lo); - io_write32(val >> 32, hi); + rte_write32(val & ((1ULL << 32) - 1), lo); + rte_write32(val >> 32, hi); } static void @@ -373,13 +307,13 @@ modern_read_dev_config(struct virtio_hw *hw, size_t offset, uint8_t old_gen, new_gen; do { - old_gen = io_read8(&hw->common_cfg->config_generation); + old_gen = rte_read8(&hw->common_cfg->config_generation); p = dst; for (i = 0; i < length; i++) - *p++ = io_read8((uint8_t *)hw->dev_cfg + offset + i); + *p++ = rte_read8((uint8_t *)hw->dev_cfg + offset + i); - new_gen = io_read8(&hw->common_cfg->config_generation); + new_gen = rte_read8(&hw->common_cfg->config_generation); } while (old_gen != new_gen); } @@ -391,7 +325,7 @@ modern_write_dev_config(struct virtio_hw *hw, size_t offset, const uint8_t *p = src; for (i = 0; i < length; i++) - io_write8(*p++, (uint8_t *)hw->dev_cfg + offset + i); + rte_write8((*p++), (((uint8_t *)hw->dev_cfg) + offset + i)); } static uint64_t @@ -399,11 +333,11 @@ modern_get_features(struct virtio_hw *hw) { uint32_t features_lo, features_hi; - io_write32(0, &hw->common_cfg->device_feature_select); - features_lo = io_read32(&hw->common_cfg->device_feature); + rte_write32(0, &hw->common_cfg->device_feature_select); + features_lo = rte_read32(&hw->common_cfg->device_feature); - io_write32(1, &hw->common_cfg->device_feature_select); - features_hi = io_read32(&hw->common_cfg->device_feature); + rte_write32(1, &hw->common_cfg->device_feature_select); + features_hi = rte_read32(&hw->common_cfg->device_feature); return ((uint64_t)features_hi << 32) | features_lo; } @@ -411,25 +345,25 @@ modern_get_features(struct virtio_hw *hw) static void modern_set_features(struct virtio_hw *hw, uint64_t features) { - io_write32(0, &hw->common_cfg->guest_feature_select); - io_write32(features & ((1ULL << 32) - 1), - &hw->common_cfg->guest_feature); + rte_write32(0, &hw->common_cfg->guest_feature_select); + rte_write32(features & ((1ULL << 32) - 1), + &hw->common_cfg->guest_feature); - io_write32(1, &hw->common_cfg->guest_feature_select); - io_write32(features >> 32, - &hw->common_cfg->guest_feature); + rte_write32(1, &hw->common_cfg->guest_feature_select); + rte_write32(features >> 32, + &hw->common_cfg->guest_feature); } static uint8_t modern_get_status(struct virtio_hw *hw) { - return io_read8(&hw->common_cfg->device_status); + return rte_read8(&hw->common_cfg->device_status); } static void modern_set_status(struct virtio_hw *hw, uint8_t status) { - io_write8(status, &hw->common_cfg->device_status); + rte_write8(status, &hw->common_cfg->device_status); } static void @@ -442,21 +376,29 @@ modern_reset(struct virtio_hw *hw) static uint8_t modern_get_isr(struct virtio_hw *hw) { - return io_read8(hw->isr); + return rte_read8(hw->isr); } static uint16_t modern_set_config_irq(struct virtio_hw *hw, uint16_t vec) { - io_write16(vec, &hw->common_cfg->msix_config); - return io_read16(&hw->common_cfg->msix_config); + rte_write16(vec, &hw->common_cfg->msix_config); + return rte_read16(&hw->common_cfg->msix_config); +} + +static uint16_t +modern_set_queue_irq(struct virtio_hw *hw, struct virtqueue *vq, uint16_t vec) +{ + rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + rte_write16(vec, &hw->common_cfg->queue_msix_vector); + return rte_read16(&hw->common_cfg->queue_msix_vector); } static uint16_t modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) { - io_write16(queue_id, &hw->common_cfg->queue_select); - return io_read16(&hw->common_cfg->queue_size); + rte_write16(queue_id, &hw->common_cfg->queue_select); + return rte_read16(&hw->common_cfg->queue_size); } static int @@ -474,7 +416,7 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) ring[vq->vq_nentries]), VIRTIO_PCI_VRING_ALIGN); - io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo, &hw->common_cfg->queue_desc_hi); @@ -483,11 +425,11 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo, &hw->common_cfg->queue_used_hi); - notify_off = io_read16(&hw->common_cfg->queue_notify_off); + notify_off = rte_read16(&hw->common_cfg->queue_notify_off); vq->notify_addr = (void *)((uint8_t *)hw->notify_base + notify_off * hw->notify_off_multiplier); - io_write16(1, &hw->common_cfg->queue_enable); + rte_write16(1, &hw->common_cfg->queue_enable); PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index); PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr); @@ -502,7 +444,7 @@ modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) static void modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq) { - io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + rte_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); io_write64_twopart(0, &hw->common_cfg->queue_desc_lo, &hw->common_cfg->queue_desc_hi); @@ -511,13 +453,13 @@ modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq) io_write64_twopart(0, &hw->common_cfg->queue_used_lo, &hw->common_cfg->queue_used_hi); - io_write16(0, &hw->common_cfg->queue_enable); + rte_write16(0, &hw->common_cfg->queue_enable); } static void modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq) { - io_write16(1, vq->notify_addr); + rte_write16(vq->vq_queue_index, vq->notify_addr); } const struct virtio_pci_ops modern_ops = { @@ -530,6 +472,7 @@ const struct virtio_pci_ops modern_ops = { .set_features = modern_set_features, .get_isr = modern_get_isr, .set_config_irq = modern_set_config_irq, + .set_queue_irq = modern_set_queue_irq, .get_queue_num = modern_get_queue_num, .setup_queue = modern_setup_queue, .del_queue = modern_del_queue, @@ -601,14 +544,6 @@ vtpci_isr(struct virtio_hw *hw) return VTPCI_OPS(hw)->get_isr(hw); } - -/* Enable one vector (0) for Link State Intrerrupt */ -uint16_t -vtpci_irq_config(struct virtio_hw *hw, uint16_t vec) -{ - return VTPCI_OPS(hw)->set_config_irq(hw, vec); -} - static void * get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap) { @@ -651,25 +586,28 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw) struct virtio_pci_cap cap; int ret; - if (rte_eal_pci_map_device(dev)) { + if (rte_pci_map_device(dev)) { PMD_INIT_LOG(DEBUG, "failed to map pci device!"); return -1; } - ret = rte_eal_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST); + ret = rte_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST); if (ret < 0) { PMD_INIT_LOG(DEBUG, "failed to read pci capability list"); return -1; } while (pos) { - ret = rte_eal_pci_read_config(dev, &cap, sizeof(cap), pos); + ret = rte_pci_read_config(dev, &cap, sizeof(cap), pos); if (ret < 0) { PMD_INIT_LOG(ERR, "failed to read pci cap at pos: %x", pos); break; } + if (cap.cap_vndr == PCI_CAP_ID_MSIX) + hw->use_msix = 1; + if (cap.cap_vndr != PCI_CAP_ID_VNDR) { PMD_INIT_LOG(DEBUG, "[%2x] skipping non VNDR cap id: %02x", @@ -686,8 +624,8 @@ virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw) hw->common_cfg = get_cfg_addr(dev, &cap); break; case VIRTIO_PCI_CAP_NOTIFY_CFG: - rte_eal_pci_read_config(dev, &hw->notify_off_multiplier, - 4, pos + sizeof(cap)); + rte_pci_read_config(dev, &hw->notify_off_multiplier, + 4, pos + sizeof(cap)); hw->notify_base = get_cfg_addr(dev, &cap); break; case VIRTIO_PCI_CAP_DEVICE_CFG: @@ -728,11 +666,8 @@ next: * Return 0 on success. */ int -vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, - uint32_t *dev_flags) +vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw) { - hw->dev = dev; - /* * Try if we can succeed reading virtio pci caps, which exists * only on modern pci device. If failed, we fallback to legacy @@ -742,12 +677,11 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, PMD_INIT_LOG(INFO, "modern virtio pci detected."); virtio_hw_internal[hw->port_id].vtpci_ops = &modern_ops; hw->modern = 1; - *dev_flags |= RTE_ETH_DEV_INTR_LSC; return 0; } PMD_INIT_LOG(INFO, "trying with legacy virtio pci."); - if (legacy_virtio_resource_init(dev, hw, dev_flags) < 0) { + if (rte_pci_ioport_map(dev, 0, VTPCI_IO(hw)) < 0) { if (dev->kdrv == RTE_KDRV_UNKNOWN && (!dev->device.devargs || dev->device.devargs->type != @@ -760,7 +694,6 @@ vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, } virtio_hw_internal[hw->port_id].vtpci_ops = &legacy_ops; - hw->use_msix = legacy_virtio_has_msix(&dev->addr); hw->modern = 0; return 0; diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h index 511a1c87..18caebdd 100644 --- a/drivers/net/virtio/virtio_pci.h +++ b/drivers/net/virtio/virtio_pci.h @@ -106,6 +106,7 @@ struct virtnet_ctl; /* The feature bitmap for virtio net */ #define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ +#define VIRTIO_NET_F_MTU 3 /* Initial MTU advice. */ #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */ #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */ @@ -160,7 +161,8 @@ struct virtnet_ctl; /* * Maximum number of virtqueues per device. */ -#define VIRTIO_MAX_VIRTQUEUES 8 +#define VIRTIO_MAX_VIRTQUEUE_PAIRS 8 +#define VIRTIO_MAX_VIRTQUEUES (VIRTIO_MAX_VIRTQUEUE_PAIRS * 2 + 1) /* Common configuration */ #define VIRTIO_PCI_CAP_COMMON_CFG 1 @@ -235,6 +237,9 @@ struct virtio_pci_ops { uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec); + uint16_t (*set_queue_irq)(struct virtio_hw *hw, struct virtqueue *vq, + uint16_t vec); + uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id); int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq); @@ -248,6 +253,8 @@ struct virtio_hw { uint64_t req_guest_features; uint64_t guest_features; uint32_t max_queue_pairs; + uint16_t started; + uint16_t max_mtu; uint16_t vtnet_hdr_size; uint8_t vlan_strip; uint8_t use_msix; @@ -258,7 +265,6 @@ struct virtio_hw { uint32_t notify_off_multiplier; uint8_t *isr; uint16_t *notify_base; - struct rte_pci_device *dev; struct virtio_pci_common_cfg *common_cfg; struct virtio_net_config *dev_cfg; void *virtio_user_dev; @@ -294,6 +300,7 @@ struct virtio_net_config { /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ uint16_t status; uint16_t max_virtqueue_pairs; + uint16_t mtu; } __attribute__((packed)); /* @@ -314,8 +321,7 @@ vtpci_with_feature(struct virtio_hw *hw, uint64_t bit) /* * Function declaration from virtio_pci.c */ -int vtpci_init(struct rte_pci_device *, struct virtio_hw *, - uint32_t *dev_flags); +int vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw); void vtpci_reset(struct virtio_hw *); void vtpci_reinit_complete(struct virtio_hw *); @@ -331,8 +337,6 @@ void vtpci_read_dev_config(struct virtio_hw *, size_t, void *, int); uint8_t vtpci_isr(struct virtio_hw *); -uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t); - extern const struct virtio_pci_ops legacy_ops; extern const struct virtio_pci_ops modern_ops; extern const struct virtio_pci_ops virtio_user_ops; diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c index a33ef1a8..fbc96dfb 100644 --- a/drivers/net/virtio/virtio_rxtx.c +++ b/drivers/net/virtio/virtio_rxtx.c @@ -72,6 +72,15 @@ #define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ ETH_TXQ_FLAGS_NOOFFLOADS) +int +virtio_dev_rx_queue_done(void *rxq, uint16_t offset) +{ + struct virtnet_rx *rxvq = rxq; + struct virtqueue *vq = rxvq->vq; + + return VIRTQUEUE_NUSED(vq) >= offset; +} + static void vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) { @@ -124,7 +133,7 @@ virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; if (unlikely(cookie == NULL)) { - PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n", + PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u", vq->vq_used_cons_idx); break; } @@ -716,7 +725,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) { struct virtnet_rx *rxvq = rx_queue; struct virtqueue *vq = rxvq->vq; - struct virtio_hw *hw; + struct virtio_hw *hw = vq->hw; struct rte_mbuf *rxm, *new_mbuf; uint16_t nb_used, num, nb_rx; uint32_t len[VIRTIO_MBUF_BURST_SZ]; @@ -727,6 +736,10 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) int offload; struct virtio_net_hdr *hdr; + nb_rx = 0; + if (unlikely(hw->started == 0)) + return nb_rx; + nb_used = VIRTQUEUE_NUSED(vq); virtio_rmb(); @@ -739,8 +752,6 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); - hw = vq->hw; - nb_rx = 0; nb_enqueued = 0; hdr_size = hw->vtnet_hdr_size; offload = rx_offload_enabled(hw); @@ -763,8 +774,6 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rxm->ol_flags = 0; rxm->vlan_tci = 0; - rxm->nb_segs = 1; - rxm->next = NULL; rxm->pkt_len = (uint32_t)(len[i] - hdr_size); rxm->data_len = (uint16_t)(len[i] - hdr_size); @@ -784,7 +793,7 @@ virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) rx_pkts[nb_rx++] = rxm; - rxvq->stats.bytes += rx_pkts[nb_rx - 1]->pkt_len; + rxvq->stats.bytes += rxm->pkt_len; virtio_update_packet_stats(&rxvq->stats, rxm); } @@ -827,7 +836,7 @@ virtio_recv_mergeable_pkts(void *rx_queue, { struct virtnet_rx *rxvq = rx_queue; struct virtqueue *vq = rxvq->vq; - struct virtio_hw *hw; + struct virtio_hw *hw = vq->hw; struct rte_mbuf *rxm, *new_mbuf; uint16_t nb_used, num, nb_rx; uint32_t len[VIRTIO_MBUF_BURST_SZ]; @@ -841,14 +850,16 @@ virtio_recv_mergeable_pkts(void *rx_queue, uint32_t hdr_size; int offload; + nb_rx = 0; + if (unlikely(hw->started == 0)) + return nb_rx; + nb_used = VIRTQUEUE_NUSED(vq); virtio_rmb(); PMD_RX_LOG(DEBUG, "used:%d", nb_used); - hw = vq->hw; - nb_rx = 0; i = 0; nb_enqueued = 0; seg_num = 0; @@ -891,7 +902,6 @@ virtio_recv_mergeable_pkts(void *rx_queue, rxm->data_off = RTE_PKTMBUF_HEADROOM; rxm->nb_segs = seg_num; - rxm->next = NULL; rxm->ol_flags = 0; rxm->vlan_tci = 0; rxm->pkt_len = (uint32_t)(len[0] - hdr_size); @@ -936,7 +946,6 @@ virtio_recv_mergeable_pkts(void *rx_queue, rxm = rcv_pkts[extra_idx]; rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; - rxm->next = NULL; rxm->pkt_len = (uint32_t)(len[extra_idx]); rxm->data_len = (uint16_t)(len[extra_idx]); @@ -1000,9 +1009,12 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) struct virtqueue *vq = txvq->vq; struct virtio_hw *hw = vq->hw; uint16_t hdr_size = hw->vtnet_hdr_size; - uint16_t nb_used, nb_tx; + uint16_t nb_used, nb_tx = 0; int error; + if (unlikely(hw->started == 0)) + return nb_tx; + if (unlikely(nb_pkts < 1)) return nb_pkts; @@ -1027,7 +1039,8 @@ virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) } /* optimize ring usage */ - if (vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) && + if ((vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) || + vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) && rte_mbuf_refcnt_read(txm) == 1 && RTE_MBUF_DIRECT(txm) && txm->nb_segs == 1 && diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c index b651e53b..542cf805 100644 --- a/drivers/net/virtio/virtio_rxtx_simple.c +++ b/drivers/net/virtio/virtio_rxtx_simple.c @@ -89,12 +89,17 @@ virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, { struct virtnet_tx *txvq = tx_queue; struct virtqueue *vq = txvq->vq; + struct virtio_hw *hw = vq->hw; uint16_t nb_used; uint16_t desc_idx; struct vring_desc *start_dp; uint16_t nb_tail, nb_commit; int i; uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1; + uint16_t nb_tx = 0; + + if (unlikely(hw->started == 0)) + return nb_tx; nb_used = VIRTQUEUE_NUSED(vq); rte_compiler_barrier(); diff --git a/drivers/net/virtio/virtio_rxtx_simple.h b/drivers/net/virtio/virtio_rxtx_simple.h index b08f8594..f531c542 100644 --- a/drivers/net/virtio/virtio_rxtx_simple.h +++ b/drivers/net/virtio/virtio_rxtx_simple.h @@ -98,13 +98,13 @@ virtio_xmit_cleanup(struct virtqueue *vq) desc_idx = (uint16_t)(vq->vq_used_cons_idx & ((vq->vq_nentries >> 1) - 1)); m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; - m = __rte_pktmbuf_prefree_seg(m); + m = rte_pktmbuf_prefree_seg(m); if (likely(m != NULL)) { free[0] = m; nb_free = 1; for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; - m = __rte_pktmbuf_prefree_seg(m); + m = rte_pktmbuf_prefree_seg(m); if (likely(m != NULL)) { if (likely(m->pool == free[0]->pool)) free[nb_free++] = m; @@ -123,7 +123,7 @@ virtio_xmit_cleanup(struct virtqueue *vq) } else { for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; - m = __rte_pktmbuf_prefree_seg(m); + m = rte_pktmbuf_prefree_seg(m); if (m != NULL) rte_mempool_put(m->pool, m); } diff --git a/drivers/net/virtio/virtio_rxtx_simple_neon.c b/drivers/net/virtio/virtio_rxtx_simple_neon.c index 793eefbe..ecc62ada 100644 --- a/drivers/net/virtio/virtio_rxtx_simple_neon.c +++ b/drivers/net/virtio/virtio_rxtx_simple_neon.c @@ -72,12 +72,13 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, { struct virtnet_rx *rxvq = rx_queue; struct virtqueue *vq = rxvq->vq; + struct virtio_hw *hw = vq->hw; uint16_t nb_used; uint16_t desc_idx; struct vring_used_elem *rused; struct rte_mbuf **sw_ring; struct rte_mbuf **sw_ring_end; - uint16_t nb_pkts_received; + uint16_t nb_pkts_received = 0; uint8x16_t shuf_msk1 = { 0xFF, 0xFF, 0xFF, 0xFF, /* packet type */ @@ -106,6 +107,9 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 0, 0 }; + if (unlikely(hw->started == 0)) + return nb_pkts_received; + if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) return 0; diff --git a/drivers/net/virtio/virtio_rxtx_simple_sse.c b/drivers/net/virtio/virtio_rxtx_simple_sse.c index 87bb5c63..7cf0f8b8 100644 --- a/drivers/net/virtio/virtio_rxtx_simple_sse.c +++ b/drivers/net/virtio/virtio_rxtx_simple_sse.c @@ -74,12 +74,13 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, { struct virtnet_rx *rxvq = rx_queue; struct virtqueue *vq = rxvq->vq; + struct virtio_hw *hw = vq->hw; uint16_t nb_used; uint16_t desc_idx; struct vring_used_elem *rused; struct rte_mbuf **sw_ring; struct rte_mbuf **sw_ring_end; - uint16_t nb_pkts_received; + uint16_t nb_pkts_received = 0; __m128i shuf_msk1, shuf_msk2, len_adjust; shuf_msk1 = _mm_set_epi8( @@ -109,6 +110,9 @@ virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, 0, (uint16_t)-vq->hw->vtnet_hdr_size, 0, 0); + if (unlikely(hw->started == 0)) + return nb_pkts_received; + if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) return 0; diff --git a/drivers/net/virtio/virtio_user/vhost.h b/drivers/net/virtio/virtio_user/vhost.h index 7adb55f5..5c983bd4 100644 --- a/drivers/net/virtio/virtio_user/vhost.h +++ b/drivers/net/virtio/virtio_user/vhost.h @@ -42,8 +42,6 @@ #include "../virtio_logs.h" #include "../virtqueue.h" -#define VHOST_MEMORY_MAX_NREGIONS 8 - struct vhost_vring_state { unsigned int index; unsigned int num; @@ -98,6 +96,8 @@ enum vhost_user_request { VHOST_USER_MAX }; +const char * const vhost_msg_strings[VHOST_USER_MAX]; + struct vhost_memory_region { uint64_t guest_phys_addr; uint64_t memory_size; /* bytes */ @@ -105,42 +105,19 @@ struct vhost_memory_region { uint64_t mmap_offset; }; -struct vhost_memory { - uint32_t nregions; - uint32_t padding; - struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS]; -}; - -struct vhost_user_msg { - enum vhost_user_request request; +struct virtio_user_dev; -#define VHOST_USER_VERSION_MASK 0x3 -#define VHOST_USER_REPLY_MASK (0x1 << 2) - uint32_t flags; - uint32_t size; /* the following payload size */ - union { -#define VHOST_USER_VRING_IDX_MASK 0xff -#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) - uint64_t u64; - struct vhost_vring_state state; - struct vhost_vring_addr addr; - struct vhost_memory memory; - } payload; - int fds[VHOST_MEMORY_MAX_NREGIONS]; -} __attribute((packed)); - -#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64) -#define VHOST_USER_PAYLOAD_SIZE \ - (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE) - -/* The version of the protocol we support */ -#define VHOST_USER_VERSION 0x1 - -#define VHOST_USER_F_PROTOCOL_FEATURES 30 -#define VHOST_USER_MQ (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) +struct virtio_user_backend_ops { + int (*setup)(struct virtio_user_dev *dev); + int (*send_request)(struct virtio_user_dev *dev, + enum vhost_user_request req, + void *arg); + int (*enable_qp)(struct virtio_user_dev *dev, + uint16_t pair_idx, + int enable); +}; -int vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg); -int vhost_user_setup(const char *path); -int vhost_user_enable_queue_pair(int vhostfd, uint16_t pair_idx, int enable); +struct virtio_user_backend_ops ops_user; +struct virtio_user_backend_ops ops_kernel; #endif diff --git a/drivers/net/virtio/virtio_user/vhost_kernel.c b/drivers/net/virtio/virtio_user/vhost_kernel.c new file mode 100644 index 00000000..68d28b13 --- /dev/null +++ b/drivers/net/virtio/virtio_user/vhost_kernel.c @@ -0,0 +1,403 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include <rte_memory.h> +#include <rte_eal_memconfig.h> + +#include "vhost.h" +#include "virtio_user_dev.h" +#include "vhost_kernel_tap.h" + +struct vhost_memory_kernel { + uint32_t nregions; + uint32_t padding; + struct vhost_memory_region regions[0]; +}; + +/* vhost kernel ioctls */ +#define VHOST_VIRTIO 0xAF +#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64) +#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64) +#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01) +#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02) +#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory_kernel) +#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64) +#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int) +#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state) +#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr) +#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state) +#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file) +#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file) +#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file) +#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file) + +static uint64_t max_regions = 64; + +static void +get_vhost_kernel_max_regions(void) +{ + int fd; + char buf[20] = {'\0'}; + + fd = open("/sys/module/vhost/parameters/max_mem_regions", O_RDONLY); + if (fd < 0) + return; + + if (read(fd, buf, sizeof(buf) - 1) > 0) + max_regions = strtoull(buf, NULL, 10); + + close(fd); +} + +static uint64_t vhost_req_user_to_kernel[] = { + [VHOST_USER_SET_OWNER] = VHOST_SET_OWNER, + [VHOST_USER_RESET_OWNER] = VHOST_RESET_OWNER, + [VHOST_USER_SET_FEATURES] = VHOST_SET_FEATURES, + [VHOST_USER_GET_FEATURES] = VHOST_GET_FEATURES, + [VHOST_USER_SET_VRING_CALL] = VHOST_SET_VRING_CALL, + [VHOST_USER_SET_VRING_NUM] = VHOST_SET_VRING_NUM, + [VHOST_USER_SET_VRING_BASE] = VHOST_SET_VRING_BASE, + [VHOST_USER_GET_VRING_BASE] = VHOST_GET_VRING_BASE, + [VHOST_USER_SET_VRING_ADDR] = VHOST_SET_VRING_ADDR, + [VHOST_USER_SET_VRING_KICK] = VHOST_SET_VRING_KICK, + [VHOST_USER_SET_MEM_TABLE] = VHOST_SET_MEM_TABLE, +}; + +/* By default, vhost kernel module allows 64 regions, but DPDK allows + * 256 segments. As a relief, below function merges those virtually + * adjacent memsegs into one region. + */ +static struct vhost_memory_kernel * +prepare_vhost_memory_kernel(void) +{ + uint32_t i, j, k = 0; + struct rte_memseg *seg; + struct vhost_memory_region *mr; + struct vhost_memory_kernel *vm; + + vm = malloc(sizeof(struct vhost_memory_kernel) + + max_regions * + sizeof(struct vhost_memory_region)); + if (!vm) + return NULL; + + for (i = 0; i < RTE_MAX_MEMSEG; ++i) { + seg = &rte_eal_get_configuration()->mem_config->memseg[i]; + if (!seg->addr) + break; + + int new_region = 1; + + for (j = 0; j < k; ++j) { + mr = &vm->regions[j]; + + if (mr->userspace_addr + mr->memory_size == + (uint64_t)(uintptr_t)seg->addr) { + mr->memory_size += seg->len; + new_region = 0; + break; + } + + if ((uint64_t)(uintptr_t)seg->addr + seg->len == + mr->userspace_addr) { + mr->guest_phys_addr = + (uint64_t)(uintptr_t)seg->addr; + mr->userspace_addr = + (uint64_t)(uintptr_t)seg->addr; + mr->memory_size += seg->len; + new_region = 0; + break; + } + } + + if (new_region == 0) + continue; + + mr = &vm->regions[k++]; + /* use vaddr here! */ + mr->guest_phys_addr = (uint64_t)(uintptr_t)seg->addr; + mr->userspace_addr = (uint64_t)(uintptr_t)seg->addr; + mr->memory_size = seg->len; + mr->mmap_offset = 0; + + if (k >= max_regions) { + free(vm); + return NULL; + } + } + + vm->nregions = k; + vm->padding = 0; + return vm; +} + +/* with below features, vhost kernel does not need to do the checksum and TSO, + * these info will be passed to virtio_user through virtio net header. + */ +#define VHOST_KERNEL_GUEST_OFFLOADS_MASK \ + ((1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6) | \ + (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ + (1ULL << VIRTIO_NET_F_GUEST_UFO)) + +/* with below features, when flows from virtio_user to vhost kernel + * (1) if flows goes up through the kernel networking stack, it does not need + * to verify checksum, which can save CPU cycles; + * (2) if flows goes through a Linux bridge and outside from an interface + * (kernel driver), checksum and TSO will be done by GSO in kernel or even + * offloaded into real physical device. + */ +#define VHOST_KERNEL_HOST_OFFLOADS_MASK \ + ((1ULL << VIRTIO_NET_F_HOST_TSO4) | \ + (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ + (1ULL << VIRTIO_NET_F_CSUM)) + +static int +tap_supporte_mq(void) +{ + int tapfd; + unsigned int tap_features; + + tapfd = open(PATH_NET_TUN, O_RDWR); + if (tapfd < 0) { + PMD_DRV_LOG(ERR, "fail to open %s: %s", + PATH_NET_TUN, strerror(errno)); + return -1; + } + + if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { + PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); + close(tapfd); + return -1; + } + + close(tapfd); + return tap_features & IFF_MULTI_QUEUE; +} + +static int +vhost_kernel_ioctl(struct virtio_user_dev *dev, + enum vhost_user_request req, + void *arg) +{ + int ret = -1; + unsigned int i; + uint64_t req_kernel; + struct vhost_memory_kernel *vm = NULL; + int vhostfd; + unsigned int queue_sel; + + PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); + + req_kernel = vhost_req_user_to_kernel[req]; + + if (req_kernel == VHOST_SET_MEM_TABLE) { + vm = prepare_vhost_memory_kernel(); + if (!vm) + return -1; + arg = (void *)vm; + } + + if (req_kernel == VHOST_SET_FEATURES) { + /* We don't need memory protection here */ + *(uint64_t *)arg &= ~(1ULL << VIRTIO_F_IOMMU_PLATFORM); + + /* VHOST kernel does not know about below flags */ + *(uint64_t *)arg &= ~VHOST_KERNEL_GUEST_OFFLOADS_MASK; + *(uint64_t *)arg &= ~VHOST_KERNEL_HOST_OFFLOADS_MASK; + + *(uint64_t *)arg &= ~(1ULL << VIRTIO_NET_F_MQ); + } + + switch (req_kernel) { + case VHOST_SET_VRING_NUM: + case VHOST_SET_VRING_ADDR: + case VHOST_SET_VRING_BASE: + case VHOST_GET_VRING_BASE: + case VHOST_SET_VRING_KICK: + case VHOST_SET_VRING_CALL: + queue_sel = *(unsigned int *)arg; + vhostfd = dev->vhostfds[queue_sel / 2]; + *(unsigned int *)arg = queue_sel % 2; + PMD_DRV_LOG(DEBUG, "vhostfd=%d, index=%u", + vhostfd, *(unsigned int *)arg); + break; + default: + vhostfd = -1; + } + if (vhostfd == -1) { + for (i = 0; i < dev->max_queue_pairs; ++i) { + if (dev->vhostfds[i] < 0) + continue; + + ret = ioctl(dev->vhostfds[i], req_kernel, arg); + if (ret < 0) + break; + } + } else { + ret = ioctl(vhostfd, req_kernel, arg); + } + + if (!ret && req_kernel == VHOST_GET_FEATURES) { + /* with tap as the backend, all these features are supported + * but not claimed by vhost-net, so we add them back when + * reporting to upper layer. + */ + *((uint64_t *)arg) |= VHOST_KERNEL_GUEST_OFFLOADS_MASK; + *((uint64_t *)arg) |= VHOST_KERNEL_HOST_OFFLOADS_MASK; + + /* vhost_kernel will not declare this feature, but it does + * support multi-queue. + */ + if (tap_supporte_mq()) + *(uint64_t *)arg |= (1ull << VIRTIO_NET_F_MQ); + } + + if (vm) + free(vm); + + if (ret < 0) + PMD_DRV_LOG(ERR, "%s failed: %s", + vhost_msg_strings[req], strerror(errno)); + + return ret; +} + +/** + * Set up environment to talk with a vhost kernel backend. + * + * @return + * - (-1) if fail to set up; + * - (>=0) if successful. + */ +static int +vhost_kernel_setup(struct virtio_user_dev *dev) +{ + int vhostfd; + uint32_t i; + + get_vhost_kernel_max_regions(); + + for (i = 0; i < dev->max_queue_pairs; ++i) { + vhostfd = open(dev->path, O_RDWR); + if (vhostfd < 0) { + PMD_DRV_LOG(ERR, "fail to open %s, %s", + dev->path, strerror(errno)); + return -1; + } + + dev->vhostfds[i] = vhostfd; + } + + return 0; +} + +static int +vhost_kernel_set_backend(int vhostfd, int tapfd) +{ + struct vhost_vring_file f; + + f.fd = tapfd; + f.index = 0; + if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { + PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", + strerror(errno)); + return -1; + } + + f.index = 1; + if (ioctl(vhostfd, VHOST_NET_SET_BACKEND, &f) < 0) { + PMD_DRV_LOG(ERR, "VHOST_NET_SET_BACKEND fails, %s", + strerror(errno)); + return -1; + } + + return 0; +} + +static int +vhost_kernel_enable_queue_pair(struct virtio_user_dev *dev, + uint16_t pair_idx, + int enable) +{ + int hdr_size; + int vhostfd; + int tapfd; + int req_mq = (dev->max_queue_pairs > 1); + + vhostfd = dev->vhostfds[pair_idx]; + + if (!enable) { + if (dev->tapfds[pair_idx] >= 0) { + close(dev->tapfds[pair_idx]); + dev->tapfds[pair_idx] = -1; + } + return vhost_kernel_set_backend(vhostfd, -1); + } else if (dev->tapfds[pair_idx] >= 0) { + return 0; + } + + if ((dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF)) || + (dev->features & (1ULL << VIRTIO_F_VERSION_1))) + hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); + else + hdr_size = sizeof(struct virtio_net_hdr); + + tapfd = vhost_kernel_open_tap(&dev->ifname, hdr_size, req_mq); + if (tapfd < 0) { + PMD_DRV_LOG(ERR, "fail to open tap for vhost kernel"); + return -1; + } + + if (vhost_kernel_set_backend(vhostfd, tapfd) < 0) { + PMD_DRV_LOG(ERR, "fail to set backend for vhost kernel"); + close(tapfd); + return -1; + } + + dev->tapfds[pair_idx] = tapfd; + return 0; +} + +struct virtio_user_backend_ops ops_kernel = { + .setup = vhost_kernel_setup, + .send_request = vhost_kernel_ioctl, + .enable_qp = vhost_kernel_enable_queue_pair +}; diff --git a/drivers/net/virtio/virtio_user/vhost_kernel_tap.c b/drivers/net/virtio/virtio_user/vhost_kernel_tap.c new file mode 100644 index 00000000..f585de8c --- /dev/null +++ b/drivers/net/virtio/virtio_user/vhost_kernel_tap.c @@ -0,0 +1,133 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <net/if.h> +#include <errno.h> +#include <string.h> +#include <limits.h> + +#include "vhost_kernel_tap.h" +#include "../virtio_logs.h" + +int +vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq) +{ + unsigned int tap_features; + int sndbuf = INT_MAX; + struct ifreq ifr; + int tapfd; + unsigned int offload = + TUN_F_CSUM | + TUN_F_TSO4 | + TUN_F_TSO6 | + TUN_F_TSO_ECN | + TUN_F_UFO; + + /* TODO: + * 1. verify we can get/set vnet_hdr_len, tap_probe_vnet_hdr_len + * 2. get number of memory regions from vhost module parameter + * max_mem_regions, supported in newer version linux kernel + */ + tapfd = open(PATH_NET_TUN, O_RDWR); + if (tapfd < 0) { + PMD_DRV_LOG(ERR, "fail to open %s: %s", + PATH_NET_TUN, strerror(errno)); + return -1; + } + + /* Construct ifr */ + memset(&ifr, 0, sizeof(ifr)); + ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + + if (ioctl(tapfd, TUNGETFEATURES, &tap_features) == -1) { + PMD_DRV_LOG(ERR, "TUNGETFEATURES failed: %s", strerror(errno)); + goto error; + } + if (tap_features & IFF_ONE_QUEUE) + ifr.ifr_flags |= IFF_ONE_QUEUE; + + /* Let tap instead of vhost-net handle vnet header, as the latter does + * not support offloading. And in this case, we should not set feature + * bit VHOST_NET_F_VIRTIO_NET_HDR. + */ + if (tap_features & IFF_VNET_HDR) { + ifr.ifr_flags |= IFF_VNET_HDR; + } else { + PMD_DRV_LOG(ERR, "TAP does not support IFF_VNET_HDR"); + goto error; + } + + if (req_mq) + ifr.ifr_flags |= IFF_MULTI_QUEUE; + + if (*p_ifname) + strncpy(ifr.ifr_name, *p_ifname, IFNAMSIZ); + else + strncpy(ifr.ifr_name, "tap%d", IFNAMSIZ); + if (ioctl(tapfd, TUNSETIFF, (void *)&ifr) == -1) { + PMD_DRV_LOG(ERR, "TUNSETIFF failed: %s", strerror(errno)); + goto error; + } + + fcntl(tapfd, F_SETFL, O_NONBLOCK); + + if (ioctl(tapfd, TUNSETVNETHDRSZ, &hdr_size) < 0) { + PMD_DRV_LOG(ERR, "TUNSETVNETHDRSZ failed: %s", strerror(errno)); + goto error; + } + + if (ioctl(tapfd, TUNSETSNDBUF, &sndbuf) < 0) { + PMD_DRV_LOG(ERR, "TUNSETSNDBUF failed: %s", strerror(errno)); + goto error; + } + + /* TODO: before set the offload capabilities, we'd better (1) check + * negotiated features to see if necessary to offload; (2) query tap + * to see if it supports the offload capabilities. + */ + if (ioctl(tapfd, TUNSETOFFLOAD, offload) != 0) + PMD_DRV_LOG(ERR, "TUNSETOFFLOAD ioctl() failed: %s", + strerror(errno)); + + if (!(*p_ifname)) + *p_ifname = strdup(ifr.ifr_name); + + return tapfd; +error: + close(tapfd); + return -1; +} diff --git a/drivers/net/virtio/virtio_user/vhost_kernel_tap.h b/drivers/net/virtio/virtio_user/vhost_kernel_tap.h new file mode 100644 index 00000000..eae340cc --- /dev/null +++ b/drivers/net/virtio/virtio_user/vhost_kernel_tap.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/ioctl.h> + +/* TUN ioctls */ +#define TUNSETIFF _IOW('T', 202, int) +#define TUNGETFEATURES _IOR('T', 207, unsigned int) +#define TUNSETOFFLOAD _IOW('T', 208, unsigned int) +#define TUNGETIFF _IOR('T', 210, unsigned int) +#define TUNSETSNDBUF _IOW('T', 212, int) +#define TUNGETVNETHDRSZ _IOR('T', 215, int) +#define TUNSETVNETHDRSZ _IOW('T', 216, int) +#define TUNSETQUEUE _IOW('T', 217, int) +#define TUNSETVNETLE _IOW('T', 220, int) +#define TUNSETVNETBE _IOW('T', 222, int) + +/* TUNSETIFF ifr flags */ +#define IFF_TAP 0x0002 +#define IFF_NO_PI 0x1000 +#define IFF_ONE_QUEUE 0x2000 +#define IFF_VNET_HDR 0x4000 +#define IFF_MULTI_QUEUE 0x0100 +#define IFF_ATTACH_QUEUE 0x0200 +#define IFF_DETACH_QUEUE 0x0400 + +/* Features for GSO (TUNSETOFFLOAD). */ +#define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ +#define TUN_F_TSO4 0x02 /* I can handle TSO for IPv4 packets */ +#define TUN_F_TSO6 0x04 /* I can handle TSO for IPv6 packets */ +#define TUN_F_TSO_ECN 0x08 /* I can handle TSO with ECN bits. */ +#define TUN_F_UFO 0x10 /* I can handle UFO packets */ + +/* Constants */ +#define PATH_NET_TUN "/dev/net/tun" + +int vhost_kernel_open_tap(char **p_ifname, int hdr_size, int req_mq); diff --git a/drivers/net/virtio/virtio_user/vhost_user.c b/drivers/net/virtio/virtio_user/vhost_user.c index 082e8217..4ad7b21b 100644 --- a/drivers/net/virtio/virtio_user/vhost_user.c +++ b/drivers/net/virtio/virtio_user/vhost_user.c @@ -41,6 +41,39 @@ #include <errno.h> #include "vhost.h" +#include "virtio_user_dev.h" + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION 0x1 + +#define VHOST_MEMORY_MAX_NREGIONS 8 +struct vhost_memory { + uint32_t nregions; + uint32_t padding; + struct vhost_memory_region regions[VHOST_MEMORY_MAX_NREGIONS]; +}; + +struct vhost_user_msg { + enum vhost_user_request request; + +#define VHOST_USER_VERSION_MASK 0x3 +#define VHOST_USER_REPLY_MASK (0x1 << 2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK 0xff +#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + struct vhost_memory memory; + } payload; + int fds[VHOST_MEMORY_MAX_NREGIONS]; +} __attribute((packed)); + +#define VHOST_USER_HDR_SIZE offsetof(struct vhost_user_msg, payload.u64) +#define VHOST_USER_PAYLOAD_SIZE \ + (sizeof(struct vhost_user_msg) - VHOST_USER_HDR_SIZE) static int vhost_user_write(int fd, void *buf, int len, int *fds, int fd_num) @@ -223,24 +256,25 @@ prepare_vhost_memory_user(struct vhost_user_msg *msg, int fds[]) static struct vhost_user_msg m; -static const char * const vhost_msg_strings[] = { - [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", - [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", - [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", - [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", - [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", - [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", - [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", - [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", - [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", - [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", - [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", - [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", - NULL, +const char * const vhost_msg_strings[] = { + [VHOST_USER_SET_OWNER] = "VHOST_SET_OWNER", + [VHOST_USER_RESET_OWNER] = "VHOST_RESET_OWNER", + [VHOST_USER_SET_FEATURES] = "VHOST_SET_FEATURES", + [VHOST_USER_GET_FEATURES] = "VHOST_GET_FEATURES", + [VHOST_USER_SET_VRING_CALL] = "VHOST_SET_VRING_CALL", + [VHOST_USER_SET_VRING_NUM] = "VHOST_SET_VRING_NUM", + [VHOST_USER_SET_VRING_BASE] = "VHOST_SET_VRING_BASE", + [VHOST_USER_GET_VRING_BASE] = "VHOST_GET_VRING_BASE", + [VHOST_USER_SET_VRING_ADDR] = "VHOST_SET_VRING_ADDR", + [VHOST_USER_SET_VRING_KICK] = "VHOST_SET_VRING_KICK", + [VHOST_USER_SET_MEM_TABLE] = "VHOST_SET_MEM_TABLE", + [VHOST_USER_SET_VRING_ENABLE] = "VHOST_SET_VRING_ENABLE", }; -int -vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg) +static int +vhost_user_sock(struct virtio_user_dev *dev, + enum vhost_user_request req, + void *arg) { struct vhost_user_msg msg; struct vhost_vring_file *file = 0; @@ -248,9 +282,9 @@ vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg) int fds[VHOST_MEMORY_MAX_NREGIONS]; int fd_num = 0; int i, len; + int vhostfd = dev->vhostfd; RTE_SET_USED(m); - RTE_SET_USED(vhost_msg_strings); PMD_DRV_LOG(INFO, "%s", vhost_msg_strings[req]); @@ -371,15 +405,13 @@ vhost_user_sock(int vhostfd, enum vhost_user_request req, void *arg) /** * Set up environment to talk with a vhost user backend. - * @param path - * - The path to vhost user unix socket file. * * @return - * - (-1) if fail to set up; - * - (>=0) if successful, and it is the fd to vhostfd. + * - (-1) if fail; + * - (0) if succeed. */ -int -vhost_user_setup(const char *path) +static int +vhost_user_setup(struct virtio_user_dev *dev) { int fd; int flag; @@ -397,18 +429,21 @@ vhost_user_setup(const char *path) memset(&un, 0, sizeof(un)); un.sun_family = AF_UNIX; - snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); + snprintf(un.sun_path, sizeof(un.sun_path), "%s", dev->path); if (connect(fd, (struct sockaddr *)&un, sizeof(un)) < 0) { PMD_DRV_LOG(ERR, "connect error, %s", strerror(errno)); close(fd); return -1; } - return fd; + dev->vhostfd = fd; + return 0; } -int -vhost_user_enable_queue_pair(int vhostfd, uint16_t pair_idx, int enable) +static int +vhost_user_enable_queue_pair(struct virtio_user_dev *dev, + uint16_t pair_idx, + int enable) { int i; @@ -418,10 +453,15 @@ vhost_user_enable_queue_pair(int vhostfd, uint16_t pair_idx, int enable) .num = enable, }; - if (vhost_user_sock(vhostfd, - VHOST_USER_SET_VRING_ENABLE, &state)) + if (vhost_user_sock(dev, VHOST_USER_SET_VRING_ENABLE, &state)) return -1; } return 0; } + +struct virtio_user_backend_ops ops_user = { + .setup = vhost_user_setup, + .send_request = vhost_user_sock, + .enable_qp = vhost_user_enable_queue_pair +}; diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.c b/drivers/net/virtio/virtio_user/virtio_user_dev.c index a38398b8..450404ba 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.c +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.c @@ -39,6 +39,9 @@ #include <sys/mman.h> #include <unistd.h> #include <sys/eventfd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> #include "vhost.h" #include "virtio_user_dev.h" @@ -51,21 +54,11 @@ virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel) * firstly because vhost depends on this msg to allocate virtqueue * pair. */ - int callfd; struct vhost_vring_file file; - /* May use invalid flag, but some backend leverages kickfd and callfd as - * criteria to judge if dev is alive. so finally we use real event_fd. - */ - callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); - if (callfd < 0) { - PMD_DRV_LOG(ERR, "callfd error, %s\n", strerror(errno)); - return -1; - } file.index = queue_sel; - file.fd = callfd; - vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_CALL, &file); - dev->callfds[queue_sel] = callfd; + file.fd = dev->callfds[queue_sel]; + dev->ops->send_request(dev, VHOST_USER_SET_VRING_CALL, &file); return 0; } @@ -73,7 +66,6 @@ virtio_user_create_queue(struct virtio_user_dev *dev, uint32_t queue_sel) static int virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) { - int kickfd; struct vhost_vring_file file; struct vhost_vring_state state; struct vring *vring = &dev->vrings[queue_sel]; @@ -88,26 +80,21 @@ virtio_user_kick_queue(struct virtio_user_dev *dev, uint32_t queue_sel) state.index = queue_sel; state.num = vring->num; - vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_NUM, &state); + dev->ops->send_request(dev, VHOST_USER_SET_VRING_NUM, &state); + state.index = queue_sel; state.num = 0; /* no reservation */ - vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_BASE, &state); + dev->ops->send_request(dev, VHOST_USER_SET_VRING_BASE, &state); - vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_ADDR, &addr); + dev->ops->send_request(dev, VHOST_USER_SET_VRING_ADDR, &addr); /* Of all per virtqueue MSGs, make sure VHOST_USER_SET_VRING_KICK comes * lastly because vhost depends on this msg to judge if * virtio is ready. */ - kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); - if (kickfd < 0) { - PMD_DRV_LOG(ERR, "kickfd error, %s\n", strerror(errno)); - return -1; - } file.index = queue_sel; - file.fd = kickfd; - vhost_user_sock(dev->vhostfd, VHOST_USER_SET_VRING_KICK, &file); - dev->kickfds[queue_sel] = kickfd; + file.fd = dev->kickfds[queue_sel]; + dev->ops->send_request(dev, VHOST_USER_SET_VRING_KICK, &file); return 0; } @@ -146,22 +133,20 @@ virtio_user_start_device(struct virtio_user_dev *dev) if (virtio_user_queue_setup(dev, virtio_user_create_queue) < 0) goto error; - /* Step 1: set features - * Make sure VHOST_USER_F_PROTOCOL_FEATURES is added if mq is enabled, - * VIRTIO_NET_F_MAC and VIRTIO_NET_F_CTRL_VQ is stripped. - */ + /* Step 1: set features */ features = dev->features; - if (dev->max_queue_pairs > 1) - features |= VHOST_USER_MQ; + /* Strip VIRTIO_NET_F_MAC, as MAC address is handled in vdev init */ features &= ~(1ull << VIRTIO_NET_F_MAC); + /* Strip VIRTIO_NET_F_CTRL_VQ, as devices do not really need to know */ features &= ~(1ull << VIRTIO_NET_F_CTRL_VQ); - ret = vhost_user_sock(dev->vhostfd, VHOST_USER_SET_FEATURES, &features); + features &= ~(1ull << VIRTIO_NET_F_STATUS); + ret = dev->ops->send_request(dev, VHOST_USER_SET_FEATURES, &features); if (ret < 0) goto error; PMD_DRV_LOG(INFO, "set features: %" PRIx64, features); /* Step 2: share memory regions */ - ret = vhost_user_sock(dev->vhostfd, VHOST_USER_SET_MEM_TABLE, NULL); + ret = dev->ops->send_request(dev, VHOST_USER_SET_MEM_TABLE, NULL); if (ret < 0) goto error; @@ -172,7 +157,7 @@ virtio_user_start_device(struct virtio_user_dev *dev) /* Step 4: enable queues * we enable the 1st queue pair by default. */ - vhost_user_enable_queue_pair(dev->vhostfd, 0, 1); + dev->ops->enable_qp(dev, 0, 1); return 0; error: @@ -184,13 +169,8 @@ int virtio_user_stop_device(struct virtio_user_dev *dev) { uint32_t i; - for (i = 0; i < dev->max_queue_pairs * 2; ++i) { - close(dev->callfds[i]); - close(dev->kickfds[i]); - } - for (i = 0; i < dev->max_queue_pairs; ++i) - vhost_user_enable_queue_pair(dev->vhostfd, i, 0); + dev->ops->enable_qp(dev, i, 0); return 0; } @@ -217,35 +197,170 @@ parse_mac(struct virtio_user_dev *dev, const char *mac) } int -virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, - int cq, int queue_size, const char *mac) +is_vhost_user_by_type(const char *path) +{ + struct stat sb; + + if (stat(path, &sb) == -1) + return 0; + + return S_ISSOCK(sb.st_mode); +} + +static int +virtio_user_dev_init_notify(struct virtio_user_dev *dev) +{ + uint32_t i, j; + int callfd; + int kickfd; + + for (i = 0; i < VIRTIO_MAX_VIRTQUEUES; ++i) { + if (i >= dev->max_queue_pairs * 2) { + dev->kickfds[i] = -1; + dev->callfds[i] = -1; + continue; + } + + /* May use invalid flag, but some backend uses kickfd and + * callfd as criteria to judge if dev is alive. so finally we + * use real event_fd. + */ + callfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (callfd < 0) { + PMD_DRV_LOG(ERR, "callfd error, %s", strerror(errno)); + break; + } + kickfd = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); + if (kickfd < 0) { + PMD_DRV_LOG(ERR, "kickfd error, %s", strerror(errno)); + break; + } + dev->callfds[i] = callfd; + dev->kickfds[i] = kickfd; + } + + if (i < VIRTIO_MAX_VIRTQUEUES) { + for (j = 0; j <= i; ++j) { + close(dev->callfds[j]); + close(dev->kickfds[j]); + } + + return -1; + } + + return 0; +} + +static int +virtio_user_fill_intr_handle(struct virtio_user_dev *dev) { uint32_t i; + struct rte_eth_dev *eth_dev = &rte_eth_devices[dev->port_id]; + if (!eth_dev->intr_handle) { + eth_dev->intr_handle = malloc(sizeof(*eth_dev->intr_handle)); + if (!eth_dev->intr_handle) { + PMD_DRV_LOG(ERR, "fail to allocate intr_handle"); + return -1; + } + memset(eth_dev->intr_handle, 0, sizeof(*eth_dev->intr_handle)); + } + + for (i = 0; i < dev->max_queue_pairs; ++i) + eth_dev->intr_handle->efds[i] = dev->callfds[i]; + eth_dev->intr_handle->nb_efd = dev->max_queue_pairs; + eth_dev->intr_handle->max_intr = dev->max_queue_pairs + 1; + eth_dev->intr_handle->type = RTE_INTR_HANDLE_VDEV; + if (dev->vhostfd >= 0) + eth_dev->intr_handle->fd = dev->vhostfd; + + return 0; +} + +static int +virtio_user_dev_setup(struct virtio_user_dev *dev) +{ + uint32_t q; + + dev->vhostfd = -1; + dev->vhostfds = NULL; + dev->tapfds = NULL; + + if (is_vhost_user_by_type(dev->path)) { + dev->ops = &ops_user; + } else { + dev->ops = &ops_kernel; + + dev->vhostfds = malloc(dev->max_queue_pairs * sizeof(int)); + dev->tapfds = malloc(dev->max_queue_pairs * sizeof(int)); + if (!dev->vhostfds || !dev->tapfds) { + PMD_INIT_LOG(ERR, "Failed to malloc"); + return -1; + } + + for (q = 0; q < dev->max_queue_pairs; ++q) { + dev->vhostfds[q] = -1; + dev->tapfds[q] = -1; + } + } + + if (dev->ops->setup(dev) < 0) + return -1; + + if (virtio_user_dev_init_notify(dev) < 0) + return -1; + + if (virtio_user_fill_intr_handle(dev) < 0) + return -1; + + return 0; +} + +/* Use below macro to filter features from vhost backend */ +#define VIRTIO_USER_SUPPORTED_FEATURES \ + (1ULL << VIRTIO_NET_F_MAC | \ + 1ULL << VIRTIO_NET_F_STATUS | \ + 1ULL << VIRTIO_NET_F_MQ | \ + 1ULL << VIRTIO_NET_F_CTRL_MAC_ADDR | \ + 1ULL << VIRTIO_NET_F_CTRL_VQ | \ + 1ULL << VIRTIO_NET_F_CTRL_RX | \ + 1ULL << VIRTIO_NET_F_CTRL_VLAN | \ + 1ULL << VIRTIO_NET_F_CSUM | \ + 1ULL << VIRTIO_NET_F_HOST_TSO4 | \ + 1ULL << VIRTIO_NET_F_HOST_TSO6 | \ + 1ULL << VIRTIO_NET_F_MRG_RXBUF | \ + 1ULL << VIRTIO_RING_F_INDIRECT_DESC | \ + 1ULL << VIRTIO_NET_F_GUEST_CSUM | \ + 1ULL << VIRTIO_NET_F_GUEST_TSO4 | \ + 1ULL << VIRTIO_NET_F_GUEST_TSO6 | \ + 1ULL << VIRTIO_F_VERSION_1) + +int +virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, + int cq, int queue_size, const char *mac, char **ifname) +{ snprintf(dev->path, PATH_MAX, "%s", path); dev->max_queue_pairs = queues; dev->queue_pairs = 1; /* mq disabled by default */ dev->queue_size = queue_size; dev->mac_specified = 0; parse_mac(dev, mac); - dev->vhostfd = -1; - for (i = 0; i < VIRTIO_MAX_VIRTQUEUES * 2 + 1; ++i) { - dev->kickfds[i] = -1; - dev->callfds[i] = -1; + if (*ifname) { + dev->ifname = *ifname; + *ifname = NULL; } - dev->vhostfd = vhost_user_setup(dev->path); - if (dev->vhostfd < 0) { + if (virtio_user_dev_setup(dev) < 0) { PMD_INIT_LOG(ERR, "backend set up fails"); return -1; } - if (vhost_user_sock(dev->vhostfd, VHOST_USER_SET_OWNER, NULL) < 0) { + if (dev->ops->send_request(dev, VHOST_USER_SET_OWNER, NULL) < 0) { PMD_INIT_LOG(ERR, "set_owner fails: %s", strerror(errno)); return -1; } - if (vhost_user_sock(dev->vhostfd, VHOST_USER_GET_FEATURES, + if (dev->ops->send_request(dev, VHOST_USER_GET_FEATURES, &dev->device_features) < 0) { PMD_INIT_LOG(ERR, "get_features failed: %s", strerror(errno)); return -1; @@ -268,12 +383,11 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, dev->device_features &= ~(1ull << VIRTIO_NET_F_CTRL_MAC_ADDR); } - if (dev->max_queue_pairs > 1) { - if (!(dev->features & VHOST_USER_MQ)) { - PMD_INIT_LOG(ERR, "MQ not supported by the backend"); - return -1; - } - } + /* The backend will not report this feature, we add it explicitly */ + if (is_vhost_user_by_type(dev->path)) + dev->device_features |= (1ull << VIRTIO_NET_F_STATUS); + + dev->device_features &= VIRTIO_USER_SUPPORTED_FEATURES; return 0; } @@ -281,7 +395,25 @@ virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, void virtio_user_dev_uninit(struct virtio_user_dev *dev) { + uint32_t i; + + virtio_user_stop_device(dev); + + for (i = 0; i < dev->max_queue_pairs * 2; ++i) { + close(dev->callfds[i]); + close(dev->kickfds[i]); + } + close(dev->vhostfd); + + if (dev->vhostfds) { + for (i = 0; i < dev->max_queue_pairs; ++i) + close(dev->vhostfds[i]); + free(dev->vhostfds); + free(dev->tapfds); + } + + free(dev->ifname); } static uint8_t @@ -297,9 +429,9 @@ virtio_user_handle_mq(struct virtio_user_dev *dev, uint16_t q_pairs) } for (i = 0; i < q_pairs; ++i) - ret |= vhost_user_enable_queue_pair(dev->vhostfd, i, 1); + ret |= dev->ops->enable_qp(dev, i, 1); for (i = q_pairs; i < dev->max_queue_pairs; ++i) - ret |= vhost_user_enable_queue_pair(dev->vhostfd, i, 0); + ret |= dev->ops->enable_qp(dev, i, 0); dev->queue_pairs = q_pairs; diff --git a/drivers/net/virtio/virtio_user/virtio_user_dev.h b/drivers/net/virtio/virtio_user/virtio_user_dev.h index 28fc788e..8361b6bd 100644 --- a/drivers/net/virtio/virtio_user/virtio_user_dev.h +++ b/drivers/net/virtio/virtio_user/virtio_user_dev.h @@ -37,11 +37,20 @@ #include <limits.h> #include "../virtio_pci.h" #include "../virtio_ring.h" +#include "vhost.h" struct virtio_user_dev { + /* for vhost_user backend */ int vhostfd; - int callfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1]; - int kickfds[VIRTIO_MAX_VIRTQUEUES * 2 + 1]; + + /* for vhost_kernel backend */ + char *ifname; + int *vhostfds; + int *tapfds; + + /* for both vhost_user and vhost_kernel */ + int callfds[VIRTIO_MAX_VIRTQUEUES]; + int kickfds[VIRTIO_MAX_VIRTQUEUES]; int mac_specified; uint32_t max_queue_pairs; uint32_t queue_pairs; @@ -51,15 +60,18 @@ struct virtio_user_dev { */ uint64_t device_features; /* supported features by device */ uint8_t status; + uint8_t port_id; uint8_t mac_addr[ETHER_ADDR_LEN]; char path[PATH_MAX]; - struct vring vrings[VIRTIO_MAX_VIRTQUEUES * 2 + 1]; + struct vring vrings[VIRTIO_MAX_VIRTQUEUES]; + struct virtio_user_backend_ops *ops; }; +int is_vhost_user_by_type(const char *path); int virtio_user_start_device(struct virtio_user_dev *dev); int virtio_user_stop_device(struct virtio_user_dev *dev); int virtio_user_dev_init(struct virtio_user_dev *dev, char *path, int queues, - int cq, int queue_size, const char *mac); + int cq, int queue_size, const char *mac, char **ifname); void virtio_user_dev_uninit(struct virtio_user_dev *dev); void virtio_user_handle_cq(struct virtio_user_dev *dev, uint16_t queue_idx); #endif diff --git a/drivers/net/virtio/virtio_user_ethdev.c b/drivers/net/virtio/virtio_user_ethdev.c index 013600e4..280406c0 100644 --- a/drivers/net/virtio/virtio_user_ethdev.c +++ b/drivers/net/virtio/virtio_user_ethdev.c @@ -34,10 +34,15 @@ #include <stdint.h> #include <sys/types.h> #include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/socket.h> #include <rte_malloc.h> #include <rte_kvargs.h> +#include <rte_ethdev_vdev.h> #include <rte_vdev.h> +#include <rte_alarm.h> #include "virtio_ethdev.h" #include "virtio_logs.h" @@ -50,6 +55,17 @@ ((struct virtio_user_dev *)(hw)->virtio_user_dev) static void +virtio_user_delayed_handler(void *param) +{ + struct virtio_hw *hw = (struct virtio_hw *)param; + struct rte_eth_dev *dev = &rte_eth_devices[hw->port_id]; + + rte_intr_callback_unregister(dev->intr_handle, + virtio_interrupt_handler, + dev); +} + +static void virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset, void *dst, int length) { @@ -63,8 +79,37 @@ virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset, return; } - if (offset == offsetof(struct virtio_net_config, status)) + if (offset == offsetof(struct virtio_net_config, status)) { + char buf[128]; + + if (dev->vhostfd >= 0) { + int r; + int flags; + + flags = fcntl(dev->vhostfd, F_GETFL); + fcntl(dev->vhostfd, F_SETFL, flags | O_NONBLOCK); + r = recv(dev->vhostfd, buf, 128, MSG_PEEK); + if (r == 0 || (r < 0 && errno != EAGAIN)) { + dev->status &= (~VIRTIO_NET_S_LINK_UP); + PMD_DRV_LOG(ERR, "virtio-user port %u is down", + hw->port_id); + /* Only client mode is available now. Once the + * connection is broken, it can never be up + * again. Besides, this function could be called + * in the process of interrupt handling, + * callback cannot be unregistered here, set an + * alarm to do it. + */ + rte_eal_alarm_set(1, + virtio_user_delayed_handler, + (void *)hw); + } else { + dev->status |= VIRTIO_NET_S_LINK_UP; + } + fcntl(dev->vhostfd, F_SETFL, flags & (~O_NONBLOCK)); + } *(uint16_t *)dst = dev->status; + } if (offset == offsetof(struct virtio_net_config, max_virtqueue_pairs)) *(uint16_t *)dst = dev->max_queue_pairs; @@ -82,7 +127,7 @@ virtio_user_write_dev_config(struct virtio_hw *hw, size_t offset, for (i = 0; i < ETHER_ADDR_LEN; ++i) dev->mac_addr[i] = ((const uint8_t *)src)[i]; else - PMD_DRV_LOG(ERR, "not supported offset=%zu, len=%d\n", + PMD_DRV_LOG(ERR, "not supported offset=%zu, len=%d", offset, length); } @@ -135,17 +180,26 @@ virtio_user_set_features(struct virtio_hw *hw, uint64_t features) static uint8_t virtio_user_get_isr(struct virtio_hw *hw __rte_unused) { - /* When config interrupt happens, driver calls this function to query - * what kinds of change happen. Interrupt mode not supported for now. + /* rxq interrupts and config interrupt are separated in virtio-user, + * here we only report config change. */ - return 0; + return VIRTIO_PCI_ISR_CONFIG; } static uint16_t virtio_user_set_config_irq(struct virtio_hw *hw __rte_unused, uint16_t vec __rte_unused) { - return VIRTIO_MSI_NO_VECTOR; + return 0; +} + +static uint16_t +virtio_user_set_queue_irq(struct virtio_hw *hw __rte_unused, + struct virtqueue *vq __rte_unused, + uint16_t vec) +{ + /* pretend we have done that */ + return vec; } /* This function is to get the queue size, aka, number of descs, of a specified @@ -212,7 +266,7 @@ virtio_user_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) } if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) - PMD_DRV_LOG(ERR, "failed to kick backend: %s\n", + PMD_DRV_LOG(ERR, "failed to kick backend: %s", strerror(errno)); } @@ -226,6 +280,7 @@ const struct virtio_pci_ops virtio_user_ops = { .set_features = virtio_user_set_features, .get_isr = virtio_user_get_isr, .set_config_irq = virtio_user_set_config_irq, + .set_queue_irq = virtio_user_set_queue_irq, .get_queue_num = virtio_user_get_queue_num, .setup_queue = virtio_user_setup_queue, .del_queue = virtio_user_del_queue, @@ -243,6 +298,8 @@ static const char *valid_args[] = { VIRTIO_USER_ARG_PATH, #define VIRTIO_USER_ARG_QUEUE_SIZE "queue_size" VIRTIO_USER_ARG_QUEUE_SIZE, +#define VIRTIO_USER_ARG_INTERFACE_NAME "iface" + VIRTIO_USER_ARG_INTERFACE_NAME, NULL }; @@ -259,6 +316,9 @@ get_string_arg(const char *key __rte_unused, *(char **)extra_args = strdup(value); + if (!*(char **)extra_args) + return -ENOMEM; + return 0; } @@ -274,28 +334,24 @@ get_integer_arg(const char *key __rte_unused, return 0; } +static struct rte_vdev_driver virtio_user_driver; + static struct rte_eth_dev * -virtio_user_eth_dev_alloc(const char *name) +virtio_user_eth_dev_alloc(struct rte_vdev_device *vdev) { struct rte_eth_dev *eth_dev; struct rte_eth_dev_data *data; struct virtio_hw *hw; struct virtio_user_dev *dev; - eth_dev = rte_eth_dev_allocate(name); + eth_dev = rte_eth_vdev_allocate(vdev, sizeof(*hw)); if (!eth_dev) { PMD_INIT_LOG(ERR, "cannot alloc rte_eth_dev"); return NULL; } data = eth_dev->data; - - hw = rte_zmalloc(NULL, sizeof(*hw), 0); - if (!hw) { - PMD_INIT_LOG(ERR, "malloc virtio_hw failed"); - rte_eth_dev_release_port(eth_dev); - return NULL; - } + hw = eth_dev->data->dev_private; dev = rte_zmalloc(NULL, sizeof(*dev), 0); if (!dev) { @@ -306,17 +362,17 @@ virtio_user_eth_dev_alloc(const char *name) } hw->port_id = data->port_id; + dev->port_id = data->port_id; virtio_hw_internal[hw->port_id].vtpci_ops = &virtio_user_ops; - hw->use_msix = 0; + /* + * MSIX is required to enable LSC (see virtio_init_device). + * Here just pretend that we support msix. + */ + hw->use_msix = 1; hw->modern = 0; hw->use_simple_rxtx = 0; hw->virtio_user_dev = dev; - data->dev_private = hw; - data->numa_node = SOCKET_ID_ANY; - data->kdrv = RTE_KDRV_NONE; data->dev_flags = RTE_ETH_DEV_DETACHABLE; - eth_dev->pci_dev = NULL; - eth_dev->driver = NULL; return eth_dev; } @@ -336,7 +392,7 @@ virtio_user_eth_dev_free(struct rte_eth_dev *eth_dev) * Returns 0 on success. */ static int -virtio_user_pmd_probe(const char *name, const char *params) +virtio_user_pmd_probe(struct rte_vdev_device *dev) { struct rte_kvargs *kvlist = NULL; struct rte_eth_dev *eth_dev; @@ -345,16 +401,11 @@ virtio_user_pmd_probe(const char *name, const char *params) uint64_t cq = VIRTIO_USER_DEF_CQ_EN; uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ; char *path = NULL; + char *ifname = NULL; char *mac_addr = NULL; int ret = -1; - if (!params || params[0] == '\0') { - PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user", - VIRTIO_USER_ARG_QUEUE_SIZE); - goto end; - } - - kvlist = rte_kvargs_parse(params, valid_args); + kvlist = rte_kvargs_parse(rte_vdev_device_args(dev), valid_args); if (!kvlist) { PMD_INIT_LOG(ERR, "error when parsing param"); goto end; @@ -368,11 +419,27 @@ virtio_user_pmd_probe(const char *name, const char *params) goto end; } } else { - PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user\n", + PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user", VIRTIO_USER_ARG_QUEUE_SIZE); goto end; } + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_INTERFACE_NAME) == 1) { + if (is_vhost_user_by_type(path)) { + PMD_INIT_LOG(ERR, + "arg %s applies only to vhost-kernel backend", + VIRTIO_USER_ARG_INTERFACE_NAME); + goto end; + } + + if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_INTERFACE_NAME, + &get_string_arg, &ifname) < 0) { + PMD_INIT_LOG(ERR, "error to parse %s", + VIRTIO_USER_ARG_INTERFACE_NAME); + goto end; + } + } + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_MAC) == 1) { if (rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC, &get_string_arg, &mac_addr) < 0) { @@ -416,21 +483,34 @@ virtio_user_pmd_probe(const char *name, const char *params) goto end; } - eth_dev = virtio_user_eth_dev_alloc(name); - if (!eth_dev) { - PMD_INIT_LOG(ERR, "virtio_user fails to alloc device"); + if (queues > VIRTIO_MAX_VIRTQUEUE_PAIRS) { + PMD_INIT_LOG(ERR, "arg %s %" PRIu64 " exceeds the limit %u", + VIRTIO_USER_ARG_QUEUES_NUM, queues, + VIRTIO_MAX_VIRTQUEUE_PAIRS); goto end; } - hw = eth_dev->data->dev_private; - if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq, - queue_size, mac_addr) < 0) { - PMD_INIT_LOG(ERR, "virtio_user_dev_init fails"); - virtio_user_eth_dev_free(eth_dev); - goto end; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev = virtio_user_eth_dev_alloc(dev); + if (!eth_dev) { + PMD_INIT_LOG(ERR, "virtio_user fails to alloc device"); + goto end; + } + + hw = eth_dev->data->dev_private; + if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq, + queue_size, mac_addr, &ifname) < 0) { + PMD_INIT_LOG(ERR, "virtio_user_dev_init fails"); + virtio_user_eth_dev_free(eth_dev); + goto end; + } + } else { + eth_dev = rte_eth_dev_attach_secondary(rte_vdev_device_name(dev)); + if (!eth_dev) + goto end; } - /* previously called by rte_eal_pci_probe() for physical dev */ + /* previously called by rte_pci_probe() for physical dev */ if (eth_virtio_dev_init(eth_dev) < 0) { PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails"); virtio_user_eth_dev_free(eth_dev); @@ -445,21 +525,25 @@ end: free(path); if (mac_addr) free(mac_addr); + if (ifname) + free(ifname); return ret; } /** Called by rte_eth_dev_detach() */ static int -virtio_user_pmd_remove(const char *name) +virtio_user_pmd_remove(struct rte_vdev_device *vdev) { + const char *name; struct rte_eth_dev *eth_dev; struct virtio_hw *hw; struct virtio_user_dev *dev; - if (!name) + if (!vdev) return -EINVAL; - PMD_DRV_LOG(INFO, "Un-Initializing %s\n", name); + name = rte_vdev_device_name(vdev); + PMD_DRV_LOG(INFO, "Un-Initializing %s", name); eth_dev = rte_eth_dev_allocated(name); if (!eth_dev) return -ENODEV; @@ -490,4 +574,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_virtio_user, "mac=<mac addr> " "cq=<int> " "queue_size=<int> " - "queues=<int>"); + "queues=<int> " + "iface=<string>"); diff --git a/drivers/net/virtio/virtqueue.c b/drivers/net/virtio/virtqueue.c index 7f60e3ef..9ad77b8a 100644 --- a/drivers/net/virtio/virtqueue.c +++ b/drivers/net/virtio/virtqueue.c @@ -38,17 +38,6 @@ #include "virtio_logs.h" #include "virtio_pci.h" -void -virtqueue_disable_intr(struct virtqueue *vq) -{ - /* - * Set VRING_AVAIL_F_NO_INTERRUPT to hint host - * not to interrupt when it consumes packets - * Note: this is only considered a hint to the host - */ - vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; -} - /* * Two types of mbuf to be cleaned: * 1) mbuf that has been consumed by backend but not used by virtio. diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h index b1070e05..2e120861 100644 --- a/drivers/net/virtio/virtqueue.h +++ b/drivers/net/virtio/virtqueue.h @@ -71,8 +71,14 @@ struct rte_mbuf; /** * Return the physical address (or virtual address in case of * virtio-user) of mbuf data buffer. + * + * The address is firstly casted to the word size (sizeof(uintptr_t)) + * before casting it to uint64_t. This is to make it work with different + * combination of word size (64 bit and 32 bit) and virtio device + * (virtio-pci and virtio-user). */ -#define VIRTIO_MBUF_ADDR(mb, vq) (*(uint64_t *)((uintptr_t)(mb) + (vq)->offset)) +#define VIRTIO_MBUF_ADDR(mb, vq) \ + ((uint64_t)(*(uintptr_t *)((uintptr_t)(mb) + (vq)->offset))) #else #define VIRTIO_MBUF_ADDR(mb, vq) ((mb)->buf_physaddr) #endif @@ -274,7 +280,21 @@ vring_desc_init(struct vring_desc *dp, uint16_t n) /** * Tell the backend not to interrupt us. */ -void virtqueue_disable_intr(struct virtqueue *vq); +static inline void +virtqueue_disable_intr(struct virtqueue *vq) +{ + vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; +} + +/** + * Tell the backend to interrupt us. + */ +static inline void +virtqueue_enable_intr(struct virtqueue *vq) +{ + vq->vq_ring.avail->flags &= (~VRING_AVAIL_F_NO_INTERRUPT); +} + /** * Dump virtqueue internal structures, for debug purpose only. */ |